From 9299c48d1663416b5da4840b121f71310cd1282f Mon Sep 17 00:00:00 2001 From: tripu Date: Tue, 15 Mar 2016 17:20:56 +0900 Subject: [PATCH 01/23] Restructure tests: split in files, start API tests --- test/api.js | 66 ++++++++++++++++++ test/l10n.js | 35 ++++++++++ test/{all-rules.js => rules.js} | 117 ++++++++++---------------------- test/validation.js | 22 ++++++ 4 files changed, 160 insertions(+), 80 deletions(-) create mode 100644 test/api.js create mode 100644 test/l10n.js rename test/{all-rules.js => rules.js} (77%) create mode 100644 test/validation.js diff --git a/test/api.js b/test/api.js new file mode 100644 index 000000000..58a75d7a9 --- /dev/null +++ b/test/api.js @@ -0,0 +1,66 @@ +/** + * Test the API. + */ + +// Settings: +const SPECS = [ + {url: 'https://www.w3.org/TR/2016/WD-appmanifest-20160312/', profile: 'WD'} +, {url: 'https://www.w3.org/TR/2016/CR-WebIDL-1-20160308/', profile: 'CR'} +, {url: 'https://www.w3.org/TR/2016/PR-ttml-imsc1-20160308/', profile: 'PR'} +, {url: 'https://www.w3.org/TR/2016/NOTE-csvw-ucr-20160225/', profile: 'WG-NOTE'} +, {url: 'https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/', profile: 'REC'} +, {url: 'https://www.w3.org/TR/2015/WD-tracking-compliance-20150714/', profile: 'LC'} +]; + +// External packages: +const expect = require('expect.js'); + +// Internal packages: +const validator = require('../lib/validator') +, sink = require('../lib/sink') +, profileMetadata = require('../lib/profiles/metadata') +; + +/** + * Assert that the profile detected in a spec is equal to the known profile. + * + * @param {String} url - public URL of a spec. + * @param {String} profile - profile that should be detected. + */ + +const detect = function(url, profile) { + const specberus = new validator.Specberus + , handler = new sink.Sink + ; + handler.on('exception', function () { + }); + handler.on('done', function () { + }); + const opts = {events: handler, profile: profileMetadata, url: url}; + it('should detect a ' + profile, function (done) { + handler.on('end-all', function () { + expect(specberus.detectedProfile).to.equal(profile); + done(); + }); + specberus.validate(opts); + }); +}; + +// We only add these test cases when testing locally; see comment in "validation.js". +if (!process || !process.env || (process.env.TRAVIS !== 'true' && !process.env.SKIP_NETWORK)) { + + describe('API', function() { + + describe('Method "metadata"', function() { + for(var i in SPECS) { + detect(SPECS[i].url, SPECS[i].profile); + } + }); + + describe('Method "validate"', function() { + // @TODO + }); + + }); + +} diff --git a/test/l10n.js b/test/l10n.js new file mode 100644 index 000000000..bb8f98a50 --- /dev/null +++ b/test/l10n.js @@ -0,0 +1,35 @@ +/** + * Test L10n features. + */ + +// External packages: +const expect = require('expect.js'); + +// Internal packages: +const wording = require('../lib/rules') +, selectors = require('../lib/l10n-selectors') +; + +describe('L10n', function() { + + describe('UI messages module', function() { + it('Should be a valid object', function() { + expect(wording).to.be.an('object'); + }); + }); + + describe('Selectors module', function() { + const s = selectors.selectors; + it('Should be a valid object', function() { + expect(s).to.be.an('object'); + }); + it('Should contain only selectors that resolve correctly', function() { + var message; + Object.keys(s).forEach(function (key) { + message = eval('wording.' + [s[key]]); + expect(message).to.be.a('string'); + }); + }); + }); + +}); diff --git a/test/all-rules.js b/test/rules.js similarity index 77% rename from test/all-rules.js rename to test/rules.js index 5be181468..ae66be503 100644 --- a/test/all-rules.js +++ b/test/rules.js @@ -1,14 +1,20 @@ +/** + * Test the rules. + */ -var Specberus = require("../lib/validator").Specberus -, validator = new Specberus() -, selectors = require("../lib/l10n-selectors").selectors -, wording = require("../lib/rules") -, expect = require("expect.js") -, pth = require("path") -, events = require("events") -, util = require("util") -, networkCats = "validation".split(" ") -, DEBUG = false +// Settings: +const DEBUG = false; + +// Native packages: +const pth = require('path'); + +// External packages: +const expect = require('expect.js'); + +// Internal packages: +const validation = require('./validation') +, validator = require('../lib/validator') +, sink = require('../lib/sink') ; var tests = { @@ -215,35 +221,10 @@ var tests = { , { doc: "heuristic/dated-url.html" } ] } +, validation: validation }; -// HTML and CSS validations often time out, and Travis CI thinks the build is broken when it happens. -// Therefore, we only add these test cases when testing locally. -// See https://github.com/w3c/specberus/issues/164 and http://docs.travis-ci.com/user/ci-environment/#Environment-variables -if (process.env.TRAVIS !== 'true') { - tests.validation = { - css: [ - { doc: "validation/simple.html", ignoreWarnings: true } - , { doc: "validation/css.html", ignoreWarnings: true } - , { doc: "validation/bad-css.html", errors: ["validation.css", "validation.css"], ignoreWarnings: true } - ] - , html: [ - { doc: "validation/simple.html" } - , { doc: "validation/invalid.html", errors: ["validation.html"] } - ] - }; -} - -function Sink () { - this.ok = 0; - this.errors = []; - this.warnings = []; - this.done = 0; -} -util.inherits(Sink, events.EventEmitter); - Object.keys(tests).forEach(function (category) { - if (process.env.SKIP_NETWORK && networkCats.indexOf(category) > -1) return; describe("Category " + category, function () { Object.keys(tests[category]).forEach(function (rule) { describe("Rule " + rule, function () { @@ -251,47 +232,47 @@ Object.keys(tests).forEach(function (category) { var passTest = test.errors ? false : true; it("should " + (passTest ? "pass" : "fail") + " for " + test.doc, function (done) { var r = require("../lib/rules/" + category + "/" + rule) - , sink = new Sink + , handler = new sink.Sink ; - sink.on("ok", function () { + handler.on("ok", function () { if (DEBUG) console.log("OK"); - sink.ok++; + handler.ok++; }); - sink.on("err", function (type, data) { + handler.on("err", function (type, data) { if (DEBUG) console.log(data); - sink.errors.push(type); + handler.errors.push(type); }); - sink.on("warning", function (type, data) { + handler.on("warning", function (type, data) { if (DEBUG) console.log("[W]", data); - sink.warnings.push(type); + handler.warnings.push(type); }); - sink.on("done", function () { + handler.on("done", function () { if (DEBUG) console.log("---done---"); - sink.done++; + handler.done++; }); - sink.on("exception", function (data) { + handler.on("exception", function (data) { console.error("[EXCEPTION] Validator had a massive failure: " + data.message); }); - sink.on("end-all", function () { + handler.on("end-all", function () { if (passTest) { - expect(sink.errors).to.be.empty(); - expect(sink.ok).to.eql(sink.done); + expect(handler.errors).to.be.empty(); + expect(handler.ok).to.eql(handler.done); } else { - expect(sink.errors.length).to.eql(test.errors.length); + expect(handler.errors.length).to.eql(test.errors.length); for (var i = 0, n = test.errors.length; i < n; i++) { - expect(sink.errors).to.contain(test.errors[i]); + expect(handler.errors).to.contain(test.errors[i]); } } if (!test.ignoreWarnings) { if (test.warnings) { - expect(sink.warnings.length).to.eql(test.warnings.length); + expect(handler.warnings.length).to.eql(test.warnings.length); for (var i = 0, n = test.warnings.length; i < n; i++) { - expect(sink.warnings).to.contain(test.warnings[i]); + expect(handler.warnings).to.contain(test.warnings[i]); } } else { - expect(sink.warnings).to.be.empty(); + expect(handler.warnings).to.be.empty(); } } done(); @@ -304,38 +285,14 @@ Object.keys(tests).forEach(function (category) { var options = { file: pth.join(__dirname, "docs", test.doc) , profile: profile - , events: sink + , events: handler }; for (var o in test.options) options[o] = test.options[o]; - validator.validate(options); + new validator.Specberus().validate(options); }); }); }); }); }); }); - -describe('l10n', function() { - - describe('UI messages module', function() { - it('should be a valid object', function() { - expect(wording).to.be.an('object'); - }); - }); - - describe('Selectors module', function() { - it('should be a valid object', function() { - expect(selectors).to.be.an('object'); - }); - it('should contain only selectors that resolve correctly', function() { - var message; - Object.keys(selectors).forEach(function (key) { - message = eval('wording.' + [selectors[key]]); - expect(message).to.be.a('string'); - }); - }); - }); - -}); - diff --git a/test/validation.js b/test/validation.js new file mode 100644 index 000000000..a8d886550 --- /dev/null +++ b/test/validation.js @@ -0,0 +1,22 @@ +/** + * Test HTML and CSS checkers. + * + * This file is not runnable by Mocha directly; it is used by "rules.js". + * + * HTML and CSS validations often time out, and Travis CI thinks the build is broken when it happens. + * Therefore, we only add these test cases when testing locally. + * See w3c/specberus#164 and + * Travis documentation. + */ + +if (!process || !process.env || (process.env.TRAVIS !== 'true' && !process.env.SKIP_NETWORK)) { + exports.css = [ + {doc: 'validation/simple.html', ignoreWarnings: true} + , {doc: 'validation/css.html', ignoreWarnings: true} + , {doc: 'validation/bad-css.html', errors: ['validation.css', 'validation.css'], ignoreWarnings: true} + ]; + exports.html = [ + {doc: 'validation/simple.html'} + , {doc: 'validation/invalid.html', errors: ['validation.html']} + ]; +} From 56165c167c5550db03c068a60c2dadde1efcd3aa Mon Sep 17 00:00:00 2001 From: tripu Date: Tue, 15 Mar 2016 17:23:14 +0900 Subject: [PATCH 02/23] Add metadata rule, refactor sink, start API funcs --- app.js | 139 ++++++++++++++++++++++++++++++--------- lib/profiles/metadata.js | 6 ++ lib/rules/metadata.js | 47 +++++++++++++ lib/sink.js | 19 ++++++ lib/validator.js | 18 +++-- 5 files changed, 192 insertions(+), 37 deletions(-) create mode 100644 lib/profiles/metadata.js create mode 100644 lib/rules/metadata.js create mode 100644 lib/sink.js diff --git a/app.js b/app.js index 526a11874..dc7cf8474 100644 --- a/app.js +++ b/app.js @@ -1,24 +1,38 @@ -/*jshint es5: true*/ +/** + * Main runnable file of Specberus. + */ -// Pseudo-constants: -var DEFAULT_PORT = 80; +// Settings: +const DEFAULT_PORT = 80; -// The Express and Socket.io server interface -var express = require("express") -, bodyParser = require('body-parser') +// Native packages: +const http = require('http'); + +// External packages: +const bodyParser = require('body-parser') , compression = require('compression') +, express = require('express') +, insafe = require('insafe') , morgan = require('morgan') -, app = express() -, server = require("http").createServer(app) -, io = require("socket.io").listen(server) -, Specberus = new require("./lib/validator").Specberus -, l10n = require("./lib/l10n") -, util = require("util") -, events = require("events") -, insafe = require("insafe") -, version = require("./package.json").version +, socket = require('socket.io') +; + +// Internal packages: +const package = require('./package.json') +, l10n = require('./lib/l10n') +, sink = require('./lib/sink') +, Specberus = new require('./lib/validator').Specberus +, profileMetadata = require('./lib/profiles/metadata') +; + +const app = express() +, server = http.createServer(app) +, io = socket.listen(server) , profiles = {} +, Sink = sink.Sink +, version = package.version ; + ("FPWD FPLC FPCR WD LC CR PR PER REC RSCND " + "CG-NOTE FPIG-NOTE IG-NOTE FPWG-NOTE WG-NOTE " + "WD-Echidna " + @@ -34,6 +48,71 @@ app.use(compression()); app.use(bodyParser.json()); app.use(express.static("public")); +app.post('/api/*', function(req, res) { + var v + , file + , profile + , handler + , options + ; + if ('/api/validate' === req.path) { + v = new Specberus(); + file = req.query.file; + profile = profiles[req.query.profile]; + handler = new Sink; + options = {file: file, events: handler, profile: profile}; + handler.on("ok", function () { + console.log("OK"); + }); + handler.on("err", function (type, data) { + console.log(data); + }); + handler.on("warning", function (type, data) { + console.log("[W]", data); + }); + handler.on("done", function () { + console.log("---done---"); + }); + handler.on("exception", function (data) { + console.error("[EXCEPTION] Validator had a massive failure: " + data.message); + }); + handler.on("end-all", function () { + console.log('All done'); + }); + v.validate(options); + } + else if ('/api/metadata' === req.path) { + v = new Specberus(); + file = req.query.file; + handler = new Sink; + options = {file: file, events: handler, profile: profileMetadata}; + handler.on("ok", function () { + console.log("OK"); + }); + handler.on("err", function (type, data) { + console.log(data); + }); + handler.on("warning", function (type, data) { + console.log("[W]", data); + }); + handler.on("done", function () { + console.log("---done---"); + }); + handler.on("exception", function (data) { + console.error("[EXCEPTION] Validator had a massive failure: " + data.message); + }); + handler.on("end-all", function () { + console.log('All done'); + console.log(v.detectedProfile); + }); + v.validate(options); + } + else { + res.status(404).send('Don\'t recognise "' + req.path + '"!'); + } + res.end(); +}); + // listen up server.listen(process.argv[2] || process.env.PORT || DEFAULT_PORT); @@ -49,8 +128,6 @@ server.listen(process.argv[2] || process.env.PORT || DEFAULT_PORT); // error, { name: "test name", code: "FOO" } // done, { name: "test name" } // finished -function Sink () {} -util.inherits(Sink, events.EventEmitter); io.sockets.on("connection", function (socket) { socket.emit("handshake", { version: version }); @@ -58,38 +135,38 @@ io.sockets.on("connection", function (socket) { if (!data.url) return socket.emit("exception", { message: "URL not provided." }); if (!data.profile) return socket.emit("exception", { message: "Profile not provided." }); if (!profiles[data.profile]) return socket.emit("exception", { message: "Profile does not exist." }); - var validator = new Specberus() - , sink = new Sink + var v = new Specberus() + , handler = new Sink , profile = profiles[data.profile] ; socket.emit("start", { rules: (profile.rules || []).map(function (rule) { return rule.name; }) }); - sink.on("ok", function (type) { + handler.on("ok", function (type) { socket.emit("ok", { name: type }); }); - sink.on("err", function (type, data) { + handler.on("err", function (type, data) { data.name = type; - data.message = l10n.message(validator.config.lang, type, data.key, data.extra); + data.message = l10n.message(v.config.lang, type, data.key, data.extra); socket.emit("err", data); }); - sink.on("warning", function (type, data) { + handler.on("warning", function (type, data) { data.name = type; - data.message = l10n.message(validator.config.lang, type, data.key, data.extra); + data.message = l10n.message(v.config.lang, type, data.key, data.extra); socket.emit("warning", data); }); - sink.on('info', function (type, data) { + handler.on('info', function (type, data) { data.name = type; - data.message = l10n.message(validator.config.lang, type, data.key, data.extra); + data.message = l10n.message(v.config.lang, type, data.key, data.extra); socket.emit('info', data); }); - sink.on("done", function (name) { + handler.on("done", function (name) { socket.emit("done", { name: name }); }); - sink.on("end-all", function () { + handler.on("end-all", function () { socket.emit("finished"); }); - sink.on("exception", function (data) { + handler.on("exception", function (data) { socket.emit("exception", data); }); insafe.check({ @@ -98,10 +175,10 @@ io.sockets.on("connection", function (socket) { }).then(function(res){ if(res.status) { try { - validator.validate({ + v.validate({ url: res.url , profile: profile - , events: sink + , events: handler , validation: data.validation , noRecTrack: data.noRecTrack , informativeOnly: data.informativeOnly diff --git a/lib/profiles/metadata.js b/lib/profiles/metadata.js new file mode 100644 index 000000000..f64e6d67e --- /dev/null +++ b/lib/profiles/metadata.js @@ -0,0 +1,6 @@ +/** + * Pseudo-profile for metadata extraction. + */ + +exports.name = 'Metadata'; +exports.rules = [require('../rules/metadata')]; diff --git a/lib/rules/metadata.js b/lib/rules/metadata.js new file mode 100644 index 000000000..93b5109f2 --- /dev/null +++ b/lib/rules/metadata.js @@ -0,0 +1,47 @@ +/** + * Pseudo-rule for metadata extraction. + */ + +// Settings: +const SELECTOR_SUBTITLE = 'body div.head h2'; + +// Internal packages: +const profiles = require('../../public/data/profiles'); + +exports.name = 'metadata'; + +exports.check = function (sr, done) { + + var candidate + , track + , profile + , matchedLength = 0 + , id + , i + , j; + + sr.$(SELECTOR_SUBTITLE).each(function() { + candidate = sr.norm(sr.$(this).text()).toLowerCase(); + i = 0; + while (i < profiles.tracks.length) { + track = profiles.tracks[i].profiles; + j = 0; + while (j < track.length) { + profile = track[j]; + if (-1 !== candidate.indexOf(profile.name.toLowerCase()) && matchedLength < profile.name.length) { + id = profile.id; + matchedLength = profile.name.length; + } + j++; + } + i++; + } + }); + if (id) { + done({detectedProfile: id}); + } + else { + done(); + } + +}; diff --git a/lib/sink.js b/lib/sink.js new file mode 100644 index 000000000..89d607c99 --- /dev/null +++ b/lib/sink.js @@ -0,0 +1,19 @@ +/** + * Generic sink. + */ + +// Native packages: +const util = require('util') +, events = require('events') +; + +var Sink = function() { + this.ok = 0; + this.errors = []; + this.warnings = []; + this.done = 0; +}; + +util.inherits(Sink, events.EventEmitter); + +exports.Sink = Sink; diff --git a/lib/validator.js b/lib/validator.js index 9acaac2f7..844d3a0da 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -1,9 +1,10 @@ -/*jshint es5:true */ +/** + * Main file of the Specberus npm package. + * + * The most useful source: + * http://services.w3.org/xslt?xmlfile=http://www.w3.org/2005/07/13-pubrules-src.html&xslfile=http://www.w3.org/2005/07/13-pubrules-compare.xsl + */ -// the most useful source: -// http://services.w3.org/xslt?xmlfile=http://www.w3.org/2005/07/13-pubrules-src.html&xslfile=http://www.w3.org/2005/07/13-pubrules-compare.xsl - -// Pseudo-constants: var DELIVERER_IDS_KEY = 'delivererIDs'; var whacko = require("whacko") @@ -78,7 +79,12 @@ Specberus.prototype.validate = function (options) { // XXX // I would like to catch all exceptions here, but this derails the testing // infrastructure which also uses exceptions that it expects aren't caught - rule.check(self, function () { + rule.check(self, function (result) { + if (result) { + for (var i in result) { + self[i] = result[i]; + } + } done++; if (!seenErrors[this.name]) self.sink.emit("ok", this.name); self.sink.emit('metadata', DELIVERER_IDS_KEY, self.delivererIDs); From 42d64478b917e26688cdbd8ebc1f7442c1190f67 Mon Sep 17 00:00:00 2001 From: tripu Date: Tue, 15 Mar 2016 18:02:44 +0900 Subject: [PATCH 03/23] Coding style --- lib/rules/metadata.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/rules/metadata.js b/lib/rules/metadata.js index 93b5109f2..8aec31bfd 100644 --- a/lib/rules/metadata.js +++ b/lib/rules/metadata.js @@ -18,7 +18,8 @@ exports.check = function (sr, done) { , matchedLength = 0 , id , i - , j; + , j + ; sr.$(SELECTOR_SUBTITLE).each(function() { candidate = sr.norm(sr.$(this).text()).toLowerCase(); From ad8dd362c253c35656426f03321ec91d333ba0b5 Mon Sep 17 00:00:00 2001 From: tripu Date: Tue, 15 Mar 2016 18:03:11 +0900 Subject: [PATCH 04/23] Test the API on Travis builds, too --- test/api.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/api.js b/test/api.js index 58a75d7a9..ab8b8d0dc 100644 --- a/test/api.js +++ b/test/api.js @@ -46,8 +46,7 @@ const detect = function(url, profile) { }); }; -// We only add these test cases when testing locally; see comment in "validation.js". -if (!process || !process.env || (process.env.TRAVIS !== 'true' && !process.env.SKIP_NETWORK)) { +if (!process || !process.env || !process.env.SKIP_NETWORK) { describe('API', function() { From 575e62252b4934ffd4a6011cfdd0473ba7a0b8a8 Mon Sep 17 00:00:00 2001 From: tripu Date: Tue, 15 Mar 2016 18:25:37 +0900 Subject: [PATCH 05/23] Code style: all packages imported as pure modules --- app.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app.js b/app.js index dc7cf8474..119f4ae4d 100644 --- a/app.js +++ b/app.js @@ -21,7 +21,7 @@ const bodyParser = require('body-parser') const package = require('./package.json') , l10n = require('./lib/l10n') , sink = require('./lib/sink') -, Specberus = new require('./lib/validator').Specberus +, validator = require('./lib/validator') , profileMetadata = require('./lib/profiles/metadata') ; @@ -56,7 +56,7 @@ app.post('/api/*', function(req, res) { , options ; if ('/api/validate' === req.path) { - v = new Specberus(); + v = new validator.Specberus; file = req.query.file; profile = profiles[req.query.profile]; handler = new Sink; @@ -82,7 +82,7 @@ app.post('/api/*', function(req, res) { v.validate(options); } else if ('/api/metadata' === req.path) { - v = new Specberus(); + v = new validator.Specberus; file = req.query.file; handler = new Sink; options = {file: file, events: handler, profile: profileMetadata}; @@ -135,7 +135,7 @@ io.sockets.on("connection", function (socket) { if (!data.url) return socket.emit("exception", { message: "URL not provided." }); if (!data.profile) return socket.emit("exception", { message: "Profile not provided." }); if (!profiles[data.profile]) return socket.emit("exception", { message: "Profile does not exist." }); - var v = new Specberus() + var v = new validator.Specberus , handler = new Sink , profile = profiles[data.profile] ; From 582270a80559251ccf5424e7e627461ea8c924ce Mon Sep 17 00:00:00 2001 From: tripu Date: Wed, 16 Mar 2016 22:31:05 +0900 Subject: [PATCH 06/23] Reworking things; too much to describe here --- app.js | 68 +-------- lib/api.js | 137 ++++++++++++++++++ lib/profiles/metadata.js | 6 +- lib/rules/metadata/deliverers.js | 35 +++++ .../{metadata.js => metadata/profile.js} | 8 +- lib/sink.js | 31 +++- lib/util.js | 30 ++++ lib/validator.js | 53 ++++++- test/api.js | 64 ++++---- test/rules.js | 87 ++++++++++- test/samples.json | 62 ++++++++ 11 files changed, 470 insertions(+), 111 deletions(-) create mode 100644 lib/api.js create mode 100644 lib/rules/metadata/deliverers.js rename lib/rules/{metadata.js => metadata/profile.js} (83%) create mode 100644 lib/util.js create mode 100644 test/samples.json diff --git a/app.js b/app.js index 119f4ae4d..2dd2a9f76 100644 --- a/app.js +++ b/app.js @@ -19,10 +19,10 @@ const bodyParser = require('body-parser') // Internal packages: const package = require('./package.json') +, api = require('./lib/api') , l10n = require('./lib/l10n') , sink = require('./lib/sink') , validator = require('./lib/validator') -, profileMetadata = require('./lib/profiles/metadata') ; const app = express() @@ -47,71 +47,7 @@ app.use(morgan('combined')); app.use(compression()); app.use(bodyParser.json()); app.use(express.static("public")); - -app.post('/api/*', function(req, res) { - var v - , file - , profile - , handler - , options - ; - if ('/api/validate' === req.path) { - v = new validator.Specberus; - file = req.query.file; - profile = profiles[req.query.profile]; - handler = new Sink; - options = {file: file, events: handler, profile: profile}; - handler.on("ok", function () { - console.log("OK"); - }); - handler.on("err", function (type, data) { - console.log(data); - }); - handler.on("warning", function (type, data) { - console.log("[W]", data); - }); - handler.on("done", function () { - console.log("---done---"); - }); - handler.on("exception", function (data) { - console.error("[EXCEPTION] Validator had a massive failure: " + data.message); - }); - handler.on("end-all", function () { - console.log('All done'); - }); - v.validate(options); - } - else if ('/api/metadata' === req.path) { - v = new validator.Specberus; - file = req.query.file; - handler = new Sink; - options = {file: file, events: handler, profile: profileMetadata}; - handler.on("ok", function () { - console.log("OK"); - }); - handler.on("err", function (type, data) { - console.log(data); - }); - handler.on("warning", function (type, data) { - console.log("[W]", data); - }); - handler.on("done", function () { - console.log("---done---"); - }); - handler.on("exception", function (data) { - console.error("[EXCEPTION] Validator had a massive failure: " + data.message); - }); - handler.on("end-all", function () { - console.log('All done'); - console.log(v.detectedProfile); - }); - v.validate(options); - } - else { - res.status(404).send('Don\'t recognise "' + req.path + '"!'); - } - res.end(); -}); +api.setUp(app); // listen up server.listen(process.argv[2] || process.env.PORT || DEFAULT_PORT); diff --git a/lib/api.js b/lib/api.js new file mode 100644 index 000000000..aa78faefe --- /dev/null +++ b/lib/api.js @@ -0,0 +1,137 @@ +/** + * Specberus REST API. + */ + +// Internal packages: +const package = require('../package.json') +, sink = require('./sink') +, validator = require('./validator') +; + +const Sink = sink.Sink +, version = package.version +; + +/** + * Build an "options" object based on an HTTP query string. + * + * @param {Object} query - an HTTP request query. + * @returns {Object} an "options" object that can be used by Specberus. + */ + +const parseSource = function(query) { + var result; + if (query.url) result = {url: query.url}; + else if (query.source) result = {source: query.source}; + else if (query.file) result = {file: query.file}; + else if (query.document) result = {document: query.document}; + return result; +}; + +/** + * Handle an API request: parse method and parameters; handle common errors. + * + * @param {Object} req - HTTP request. + * @param {Object} res - HTTP result. + */ + +const parseRequest = function(req, res) { + + var options + , v + , handler + ; + + if ('/api/version' === req.path) { + res.status(200).send(version); + } + + else if (!req.query) { + res.status(400).send('Missing parameters.'); + } + + else if ('/api/metadata' === req.path) { + options = parseSource(req.query); + if (0 === Object.keys(options).length) { + res.status(400).send('At least one of "url", "source", "file" or "document" must be specified.'); + } + else { + var done = false; + v = new validator.Specberus + handler = new Sink(function(data) { + console.dir(data); + // if (!done) res.status(500).send(data); + // done = true; + }, function(data) { + console.dir(data); + if (!done) res.status(200).send(v.metadata); + done = true; + }) + ; + options.events = handler; + v.extractMetadata(options); + } + } + + else if ('/api/validate' === req.path) { + options = parseSource(req.query); + if (0 === Object.keys(options).length) { + res.status(400).send('At least one of "url", "source", "file" or "document" must be specified.'); + } + else { + v = new validator.Specberus + handler = new Sink(function(data) { + res.status(500).send(data); + }, function(data) { + res.status(200).end(); + }) + ; + options.events = handler; + v.validate(options); + } + } + + else { + res.status(404).send('Wrong API method.'); + } + +}; + +const setUp = function(app) { + + app.post('/api/*', parseRequest); /* function(req, res) { + var v + , file + , profile + , handler + , options + ; + if ('/api/validate' === req.path) { + v = new validator.Specberus; + file = req.query.file; + profile = profiles[req.query.profile]; + handler = new Sink(function(data) { + console.log(data); + }, function(data) { + console.log(v.detectedProfile); + }); + options = {file: file, events: handler, profile: profile}; + v.validate(options); + } + else if ('/api/metadata' === req.path) { + v = new validator.Specberus; + file = req.query.file; + handler = new Sink(function(data) { + console.log(data); + }, function(data) { + console.log(v.detectedProfile); + }); + options = {file: file, events: handler, profile: profileMetadata}; + v.validate(options); + } + res.end(); + }); */ + +}; + +exports.setUp = setUp; diff --git a/lib/profiles/metadata.js b/lib/profiles/metadata.js index f64e6d67e..c8bb796e1 100644 --- a/lib/profiles/metadata.js +++ b/lib/profiles/metadata.js @@ -3,4 +3,8 @@ */ exports.name = 'Metadata'; -exports.rules = [require('../rules/metadata')]; + +exports.rules = [ + require('../rules/metadata/profile') +, require('../rules/metadata/deliverers') +]; diff --git a/lib/rules/metadata/deliverers.js b/lib/rules/metadata/deliverers.js new file mode 100644 index 000000000..65b70b069 --- /dev/null +++ b/lib/rules/metadata/deliverers.js @@ -0,0 +1,35 @@ +/** + * Pseudo-rule for metadata extraction: deliverers. + */ + +// Settings: +const REGEX_GROUP = /^.*[^\s]+\s+(interest|community|working)\s+group\s*$/i; + +// Internal packages: +const util = require('../../util'); + +exports.name = 'metadata.deliverers'; + +exports.check = function(sr, done) { + + var result = [] + , found = {} + ; + + sr.$('a').each(function() { + const item = sr.$(this); + // if (item.text().toLowerCase().indexOf('group') > -1) console.log('---' + item.text() + '---'); + if (REGEX_GROUP.test(item.text())) { + const name = item.text().trim() + , url = item.attr('href') + ; + if (!found[util.normaliseURI(url)]) { + found[util.normaliseURI(url)] = true; + result.push({name: name, homepage: url}); + } + } + }); + + done({detectedDeliverers: result}); + +}; diff --git a/lib/rules/metadata.js b/lib/rules/metadata/profile.js similarity index 83% rename from lib/rules/metadata.js rename to lib/rules/metadata/profile.js index 8aec31bfd..9e7d043de 100644 --- a/lib/rules/metadata.js +++ b/lib/rules/metadata/profile.js @@ -1,16 +1,16 @@ /** - * Pseudo-rule for metadata extraction. + * Pseudo-rule for metadata extraction: profile. */ // Settings: const SELECTOR_SUBTITLE = 'body div.head h2'; // Internal packages: -const profiles = require('../../public/data/profiles'); +const profiles = require('../../../public/data/profiles'); -exports.name = 'metadata'; +exports.name = 'metadata.profile'; -exports.check = function (sr, done) { +exports.check = function(sr, done) { var candidate , track diff --git a/lib/sink.js b/lib/sink.js index 89d607c99..3eab7e4f9 100644 --- a/lib/sink.js +++ b/lib/sink.js @@ -7,11 +7,40 @@ const util = require('util') , events = require('events') ; -var Sink = function() { +/** + * Build a generic event handler that can be used by Specberus. + * + * @param {Function} error - function to call in case of exception or error. + * @param {Function} done - function to call at the very end of the process. + */ + +var Sink = function(error, done) { + this.ok = 0; this.errors = []; this.warnings = []; this.done = 0; + + if(error) { + + this.on('exception', function (data) { + error(data); + }); + + this.on('err', function (data) { + error(data); + }); + + } + + if(done) { + + this.on('end-all', function (data) { + done(data); + }); + + } + }; util.inherits(Sink, events.EventEmitter); diff --git a/lib/util.js b/lib/util.js new file mode 100644 index 000000000..e12341c0d --- /dev/null +++ b/lib/util.js @@ -0,0 +1,30 @@ +/** + * Miscellaneous utilities, mostly String-related routines. + */ + +const REGEX_URI = /https?:\/\/(www\.)?((.+)[^\ \/])\/?$/i; + +/** + * Reduce a URI to its minimum expression, for easier comparison. + * + * This works heuristically; it strips a URI of the usual variants and converts it to lowercase + * ("www." at the beginning, "/" at the end) + * + * @param {String} uri - Original URI. + * @returns {String} The "normalised", (probably) equivalent URI. + */ + +const normaliseURI = function(uri) { + + var result = uri.trim().toLowerCase(); + const matches = REGEX_URI.exec(result); + + if (matches && matches.length > 2) { + result = matches[2]; + } + + return result; + +}; + +exports.normaliseURI = normaliseURI; diff --git a/lib/validator.js b/lib/validator.js index 844d3a0da..0db0b654f 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -12,6 +12,7 @@ var whacko = require("whacko") , sua = require("./throttled-ua") , version = require("../package.json").version , Exceptions = require("./exceptions").Exceptions +, profileMetadata = require('./profiles/metadata') ; var Specberus = function () { @@ -30,6 +31,51 @@ Specberus.prototype.clearCache = function () { this.exceptions = new Exceptions(); }; +Specberus.prototype.extractMetadata = function (options) { + + this.clearCache(); + var self = this; + + if (!options.events) return console.error('[EXCEPTION] The events option is required for reporting.'); + self.sink = options.events; + if (self.sink.listeners('exception').length === 0) { + console.error("[WARNING] No handler for event `exception` which to report system errors."); + } + + self.config = {lang: 'en_GB'}; + self.metadata = {}; + var seenErrors = {}; + self.sink.on("err", function (name) { seenErrors[name] = true; }); + var doValidation = function (err, query) { + if (err) return self.throw(err); + self.$ = query; + self.sink.emit("start-all", profileMetadata); + var total = (profileMetadata.rules || []).length + , done = 0 + ; + profileMetadata.rules.forEach(function (rule) { + rule.check(self, function (result) { + if (result) { + // console.dir(result); + for (var i in result) { + self.metadata[i] = result[i]; + } + } + done++; + if (!seenErrors[this.name]) self.sink.emit("ok", this.name); + self.sink.emit('metadata', DELIVERER_IDS_KEY, self.delivererIDs); + self.sink.emit("done", this.name); + if (done === total) self.sink.emit("end-all", profileMetadata.name); + }.bind(rule)); + }); + }; + if (options.url) this.loadURL(options.url, doValidation); + else if (options.source) this.loadSource(options.source, doValidation); + else if (options.file) this.loadFile(options.file, doValidation); + else if (options.document) this.loadDocument(options.document, doValidation); + else return this.throw("At least one of url, source, file, or document must be specified."); +}; + Specberus.prototype.validate = function (options) { // accepted options: // - url: URL for a document to load @@ -79,12 +125,7 @@ Specberus.prototype.validate = function (options) { // XXX // I would like to catch all exceptions here, but this derails the testing // infrastructure which also uses exceptions that it expects aren't caught - rule.check(self, function (result) { - if (result) { - for (var i in result) { - self[i] = result[i]; - } - } + rule.check(self, function () { done++; if (!seenErrors[this.name]) self.sink.emit("ok", this.name); self.sink.emit('metadata', DELIVERER_IDS_KEY, self.delivererIDs); diff --git a/test/api.js b/test/api.js index ab8b8d0dc..b1913024b 100644 --- a/test/api.js +++ b/test/api.js @@ -1,24 +1,15 @@ /** - * Test the API. + * Test the REST API. */ -// Settings: -const SPECS = [ - {url: 'https://www.w3.org/TR/2016/WD-appmanifest-20160312/', profile: 'WD'} -, {url: 'https://www.w3.org/TR/2016/CR-WebIDL-1-20160308/', profile: 'CR'} -, {url: 'https://www.w3.org/TR/2016/PR-ttml-imsc1-20160308/', profile: 'PR'} -, {url: 'https://www.w3.org/TR/2016/NOTE-csvw-ucr-20160225/', profile: 'WG-NOTE'} -, {url: 'https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/', profile: 'REC'} -, {url: 'https://www.w3.org/TR/2015/WD-tracking-compliance-20150714/', profile: 'LC'} -]; - // External packages: -const expect = require('expect.js'); +const expect = require('expect.js') +, superagent = require('superagent') +; // Internal packages: -const validator = require('../lib/validator') -, sink = require('../lib/sink') -, profileMetadata = require('../lib/profiles/metadata') +const samples = require('./samples') +, package = require('../package') ; /** @@ -29,35 +20,44 @@ const validator = require('../lib/validator') */ const detect = function(url, profile) { - const specberus = new validator.Specberus - , handler = new sink.Sink - ; - handler.on('exception', function () { - }); - handler.on('done', function () { - }); - const opts = {events: handler, profile: profileMetadata, url: url}; - it('should detect a ' + profile, function (done) { - handler.on('end-all', function () { - expect(specberus.detectedProfile).to.equal(profile); - done(); - }); - specberus.validate(opts); + it('Should detect a ' + profile, function () { + // @TODO; submit URL to endpoint and check profiles. }); }; if (!process || !process.env || !process.env.SKIP_NETWORK) { + // @TODO: launch Specberus locally as a server, listening to HTTP requests. + describe('API', function() { + it('The endpoint should exist', function() { + // @TODO + }); + + describe('Method "version"', function() { + it('Should exist'), function() { + // @TODO + }; + it('Should return the right version string'), function() { + // @TODO; query method and compare with "package.version". + }; + }); + describe('Method "metadata"', function() { - for(var i in SPECS) { - detect(SPECS[i].url, SPECS[i].profile); + it('Should exist'), function(done) { + // @TODO + }; + for(var i in samples) { + detect(samples[i].url, samples[i].profile); } }); describe('Method "validate"', function() { - // @TODO + it('Should exist'), function(done) { + // @TODO + }; + // @TODO; submit a few sample specs for validation; check results. }); }); diff --git a/test/rules.js b/test/rules.js index ae66be503..e14ff1c18 100644 --- a/test/rules.js +++ b/test/rules.js @@ -3,7 +3,10 @@ */ // Settings: -const DEBUG = false; +const DEBUG = false +, METADATA_PROFILE = 'profile' +, METADATA_DELIVERERS = 'deliverers' +; // Native packages: const pth = require('path'); @@ -13,10 +16,92 @@ const expect = require('expect.js'); // Internal packages: const validation = require('./validation') +, samples = require('./samples') , validator = require('../lib/validator') , sink = require('../lib/sink') ; +/** + * Assert that the profile detected in a spec is equal to the known profile. + * + * @param {String} url - public URL of a spec. + * @param {String} profile - profile that should be detected. + * @param {Array} deliverers - set of deliverers that should be detected. + */ + +const compareMetadata = function(url, type, expectedValue) { // profile, deliverers) { + + const specberus = new validator.Specberus + , handler = new sink.Sink(console.log) // , console.log) + ; + // handler.on('exception', function () {}); + // handler.on('done', function () {}); + const opts = {events: handler, url: url}; + + if (METADATA_PROFILE === type) { + it('Should detect a ' + expectedValue, function () { + handler.on('end-all', function () { + // console.dir(specberus.metadata); + // expect(specberus.metadata).to.not.be(undefined); + // expect(specberus.metadata.detectedProfile).to.not.be(undefined); + expect(specberus.metadata.detectedProfile).to.equal(expectedValue); + }); + specberus.extractMetadata(opts); + }); + } + else if (METADATA_DELIVERERS === type) { + it('Should find deliverers of sample spec', function () { + handler.on('end-all', function () { + // console.dir(specberus.metadata); + // expect(specberus.metadata).to.not.be(undefined); + // expect(specberus.metadata.detectedDeliverers).to.not.be(undefined); + // expect(specberus.metadata.detectedDeliverers).to.be.an('array'); + expect(specberus.metadata.detectedDeliverers.length).to.equal(expectedValue.length); + // for(var i = 0; i < specberus.metadata.detectedDeliverers.length; i ++) { + // @TODO: compare all deliverers, one by one. + // } + // done(); + }); + specberus.extractMetadata(opts); + }); + + + } + +}; + +describe('Basics', function() { + + const specberus = new validator.Specberus; + + describe('Method "extractMetadata"', function() { + + // it('Should exist and be a function'), function() { + // expect(specberus.extractMetadata).to.be.a('function'); + // }; + + if (!process || !process.env || !process.env.SKIP_NETWORK) { + for(var i in samples) { + compareMetadata(samples[i].url, METADATA_PROFILE, samples[i].profile); + } + } + + if (!process || !process.env || !process.env.SKIP_NETWORK) { + for(var i in samples) { + compareMetadata(samples[i].url, METADATA_DELIVERERS, samples[i].deliverers); + } + } + + }); + + // describe('Method "validate"', function() { + // it('Should exist and be a function'), function() { + // expect(specberus.validate).to.be.a('function'); + // }; + // }); + +}); + var tests = { // Categories dummy: { diff --git a/test/samples.json b/test/samples.json new file mode 100644 index 000000000..1bcc3e1a4 --- /dev/null +++ b/test/samples.json @@ -0,0 +1,62 @@ +[ + { + "url": "https://www.w3.org/TR/2016/WD-appmanifest-20160312/" + , "profile": "WD" + , "deliverers": [ + { + "name": "Web Platform Working Group" + , "homepage": "http://www.w3.org/WebPlatform/WG/" + } + ] + } +, { + "url": "https://www.w3.org/TR/2016/CR-WebIDL-1-20160308/" + , "profile": "CR" + , "deliverers": [ + { + "name": "Web Platform Working Group" + , "homepage": "https://www.w3.org/WebPlatform/WG/" + } + ] + } +, { + "url": "https://www.w3.org/TR/2016/PR-ttml-imsc1-20160308/" + , "profile": "PR" + , "deliverers": [ + { + "name": "Timed Text Working Group" + , "homepage": "http://www.w3.org/AudioVideo/TT/" + } + ] + } +, { + "url": "https://www.w3.org/TR/2016/NOTE-csvw-ucr-20160225/" + , "profile": "WG-NOTE" + , "deliverers": [ + { + "name": "CVS on the Web Working Group" + , "homepage": "http://www.w3.org/2013/csvw/" + } + ] + } +, { + "url": "https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/" + , "profile": "REC" + , "deliverers": [ + { + "name": "CVS on the Web Working Group" + , "homepage": "http://www.w3.org/2013/csvw" + } + ] + } +, { + "url": "https://www.w3.org/TR/2015/WD-tracking-compliance-20150714/" + , "profile": "LC" + , "deliverers": [ + { + "name": "Tracking Protection Working Group" + , "homepage": "http://www.w3.org/2011/tracking-protection/" + } + ] + } +] From e69e9fa3fbc1387d8b5351232506c9eb727de6f7 Mon Sep 17 00:00:00 2001 From: tripu Date: Thu, 17 Mar 2016 02:54:47 +0900 Subject: [PATCH 07/23] Extend JSON of samples to test metadata extraction Add local filename and one example of joint-publication. --- test/samples.json | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/samples.json b/test/samples.json index 1bcc3e1a4..e3f8175d5 100644 --- a/test/samples.json +++ b/test/samples.json @@ -1,6 +1,7 @@ [ { "url": "https://www.w3.org/TR/2016/WD-appmanifest-20160312/" + , "file": "appmanifest" , "profile": "WD" , "deliverers": [ { @@ -11,6 +12,7 @@ } , { "url": "https://www.w3.org/TR/2016/CR-WebIDL-1-20160308/" + , "file": "WebIDL-1" , "profile": "CR" , "deliverers": [ { @@ -21,6 +23,7 @@ } , { "url": "https://www.w3.org/TR/2016/PR-ttml-imsc1-20160308/" + , "file": "ttml-imsc1" , "profile": "PR" , "deliverers": [ { @@ -31,6 +34,7 @@ } , { "url": "https://www.w3.org/TR/2016/NOTE-csvw-ucr-20160225/" + , "file": "csvw-ucr" , "profile": "WG-NOTE" , "deliverers": [ { @@ -41,6 +45,7 @@ } , { "url": "https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/" + , "file": "tabular-data-model" , "profile": "REC" , "deliverers": [ { @@ -51,6 +56,7 @@ } , { "url": "https://www.w3.org/TR/2015/WD-tracking-compliance-20150714/" + , "file": "tracking-compliance" , "profile": "LC" , "deliverers": [ { @@ -59,4 +65,19 @@ } ] } +, { + "url": "https://www.w3.org/TR/2016/WD-mediacapture-depth-20160226/" + , "file": "mediacapture-depth" + , "profile": "WD" + , "deliverers": [ + { + "name": "Device APIs Working Group" + , "homepage":"http://www.w3.org/2009/dap/" + } + , { + "name": "Web Real-Time Communications Working Group" + , "homepage": "http://www.w3.org/2011/04/webrtc/" + } + ] + } ] From 42470bffe15ad826bcda684d335eb78617857d4f Mon Sep 17 00:00:00 2001 From: tripu Date: Thu, 17 Mar 2016 02:55:40 +0900 Subject: [PATCH 08/23] Include all 7 metadata samples (for offline tests) --- test/docs/metadata/WebIDL-1.html | 13861 ++++++++++++++++++ test/docs/metadata/appmanifest.html | 3475 +++++ test/docs/metadata/csvw-ucr.html | 4439 ++++++ test/docs/metadata/mediacapture-depth.html | 1460 ++ test/docs/metadata/tabular-data-model.html | 3049 ++++ test/docs/metadata/tracking-compliance.html | 1116 ++ test/docs/metadata/ttml-imsc1.html | 4075 +++++ 7 files changed, 31475 insertions(+) create mode 100644 test/docs/metadata/WebIDL-1.html create mode 100644 test/docs/metadata/appmanifest.html create mode 100644 test/docs/metadata/csvw-ucr.html create mode 100644 test/docs/metadata/mediacapture-depth.html create mode 100644 test/docs/metadata/tabular-data-model.html create mode 100644 test/docs/metadata/tracking-compliance.html create mode 100644 test/docs/metadata/ttml-imsc1.html diff --git a/test/docs/metadata/WebIDL-1.html b/test/docs/metadata/WebIDL-1.html new file mode 100644 index 000000000..4c4d952ce --- /dev/null +++ b/test/docs/metadata/WebIDL-1.html @@ -0,0 +1,13861 @@ + + + + + WebIDL Level 1 + + + + + + + + +

Jump to Table of Contents Collapse Sidebar

+ + + + +

Abstract

+ +

+ This document defines an interface definition language, Web IDL, + that can be used to describe interfaces that are intended to be + implemented in web browsers. Web IDL is an IDL variant with a + number of features that allow the behavior of common script objects in + the web platform to be specified more readily. How interfaces + described with Web IDL correspond to constructs within ECMAScript + execution environments is also detailed in this document. + It is expected that this document acts + as a guide to implementors of already-published specifications, + and that newly published specifications reference this + document to ensure conforming implementations of interfaces + are interoperable. +

+ + +

Status of This Document

+

+ This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. +

+ + + +

+ This is the "Level 1" Version of WebIDL, it contains parts of the main Editor's copy [WEBIDL] that + are considered stable, implemented and tested. This intent of this document is to provide a stable reference this subset. + Implementors should defer to the Editor's copy [WEBIDL] only, as it may contain updated algorithm and definitions; + this specification is suitable for reference by other specification authors in so far as it wholly contains the syntax definitions used in the citing document. + Note that this specification will be updated to match changes in the editor's copy until it reaches Recommendation. New syntax definitions will be added in the next Level of WebIDL. +

+

+ A public test suite is in development. Due to its nature, it should be integrated in the Test Suites for Web Platform specifications not as a WebIDL test suite, but as chunks relative to each specification tested. The entrance criteria for this document is two have at least two independent +implementations passing each test. The Working Group will prepare an implementation report to track progress. +

+

+ This document is produced by the + Web Platform Working Group + in the W3C Interaction Domain. + Changes made to this document can be found in the specification’s + commit log on GitHub: + recent changes, + older changes. +

+

+ + There is a bug tracker + for the specification. +

+ + +

+ This document was published by the Web Platform Working Group as a Candidate Recommendation. + This document is intended to become a W3C Recommendation. + If you wish to make comments regarding this document, please send them to + public-script-coord@w3.org + (subscribe, + archives). + + W3C publishes a Candidate Recommendation to indicate that the document is believed to be + stable and to encourage implementation by the developer community. This Candidate + Recommendation is expected to advance to Proposed Recommendation no earlier than + 23 May 2016. + All comments are welcome. +

+

+ Please see the Working Group's implementation + report. +

+

+ Publication as a Candidate Recommendation does not imply endorsement by the W3C + Membership. This is a draft document and may be updated, replaced or obsoleted by other + documents at any time. It is inappropriate to cite this document as other than work in + progress. +

+

+ This document was produced by + a group + operating under the + 5 February 2004 W3C Patent + Policy. + W3C maintains a public list of any patent + disclosures + made in connection with the deliverables of + the group; that page also includes + instructions for disclosing a patent. An individual who has actual knowledge of a patent + which the individual believes contains + Essential + Claim(s) must disclose the information in accordance with + section + 6 of the W3C Patent Policy. +

+

This document is governed by the 1 September 2015 W3C Process Document. +

+ +
+ + + + + + +
+

1. Introduction

+ +

This section is informative.

+ +

+ Technical reports published by the W3C that include programming + language interfaces have typically been described using the + Object Management Group’s Interface Definition Language (IDL) + [OMGIDL]. The IDL provides a means to + describe these interfaces in a language independent manner. Usually, + additional language binding appendices are included in such + documents which detail how the interfaces described with the IDL + correspond to constructs in the given language. +

+

+ However, the bindings in these specifications for the language most + commonly used on the web, ECMAScript, are consistently specified with + low enough precision as to result in interoperability issues. In + addition, each specification must describe the same basic information, + such as DOM interfaces described in IDL corresponding to properties + on the ECMAScript global object, or the unsigned + long IDL type mapping to the Number + type in ECMAScript. +

+

+ This specification defines an IDL language similar to OMG IDL + for use by specifications that define interfaces for Web APIs. A number of extensions are + given to the IDL to support common functionality that previously must + have been written in prose. In addition, precise language bindings + for ECMAScript Edition 6 are given. +

+ +
+

1.1 Typographic conventions

+ +

+ The following typographic conventions are used in this document: +

+
    +
  • Defining instances of terms: example term
  • +
  • Links to terms defined in this document: example term
  • +
  • Links to terms defined in other documents: example term
  • +
  • Grammar symbols: ExampleGrammarSymbol
  • +
  • IDL and ECMAScript types: ExampleType
  • +
  • Code snippets: a = b + obj.f()
  • +
  • Unicode characters: U+0030 DIGIT ZERO ("0")
  • +
  • Extended attributes: [ExampleExtendedAttribute]
  • +
  • Variable names in prose and algorithms: exampleVariableName.
  • +
  • IDL informal syntax examples: +
    interface identifier {
    +  interface-members…
    +};
    + (Red text is used to highlight specific parts of the syntax discussed in surrounding prose.)
  • +
  • IDL grammar snippets: + + +
    [5]ExampleGrammarSymbolOtherSymbol "sometoken"
     | AnotherSymbol
     | ε  // nothing
    + (Each grammar rule is assigned a number for reference, shown on the left.)
  • +
  • Non-normative notes:
    Note

    This is a note.

  • +
  • Non-normative examples:
    Example

    This is an example.

  • +
  • Normative warnings:
    Warning

    This is a warning.

  • +
  • Code blocks:
    IDL
    // This is an IDL code block.
    +interface Example {
    +  attribute long something;
    +};
    +
    ECMAScript
    // This is an ECMAScript code block.
    +window.onload = function() { window.alert("loaded"); };
  • +
+
+
+ +

2. Conformance

+

+ As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, + and notes in this specification are non-normative. Everything else in this specification is + normative. +

+

The key words MAY, MUST, MUST NOT, REQUIRED, SHALL, SHOULD, and SHOULD NOT are + to be interpreted as described in [RFC2119]. +

+ + + + + +

+ The following conformance classes are defined by this specification: +

+
+
conforming set of IDL fragments
+
+

+ A set of IDL fragments is considered + to be a conforming + set of IDL fragments if, taken together, they satisfy all of the + MUST-, + REQUIRED- and SHALL-level + criteria in this specification that apply to IDL fragments. +

+
+
conforming implementation
+
+

+ A user agent is considered to be a + conforming implementation + relative to a conforming + set of IDL fragments if it satisfies all of the MUST-, + REQUIRED- and SHALL-level + criteria in this specification that apply to implementations for all language + bindings that the user agent supports. +

+
+
conforming ECMAScript implementation
+
+

+ A user agent is considered to be a + conforming ECMAScript implementation + relative to a conforming + set of IDL fragments if it satisfies all of the MUST-, + REQUIRED- and SHALL-level + criteria in this specification that apply to implementations for the ECMAScript + language binding. +

+
+
+
+ +
+

3. Interface definition language

+ +

+ This section describes a language, Web IDL, which can be used to define + interfaces for APIs in the Web platform. A specification that defines Web APIs + can include one or more IDL fragments that + describe the interfaces (the state and behavior that objects can exhibit) + for the APIs defined by that specification. + An IDL fragment is + a sequence of definitions that matches the Definitions grammar symbol. + The set of IDL fragments that + an implementation supports is not ordered. + See Appendix A. for the complete grammar and an explanation of the notation used. +

+ +

+ The different kinds of definitions that can appear in an + IDL fragment are: + interfaces, + partial interface definitions, + dictionaries, + partial dictionary definitions, + typedefs and + implements statements. + These are all defined in the following sections. +

+ +

+ Each definition + (matching Definition) + can be preceded by a list of extended attributes (matching + ExtendedAttributeList), + which can control how the definition will be handled in language bindings. + The extended attributes defined by this specification that are language binding + agnostic are discussed in section 3.11 , + while those specific to the ECMAScript language binding are discussed + in section 4.3 . +

+ +
[extended-attributes]
+interface identifier {
+  interface-members…
+};
+ +
[1]DefinitionsExtendedAttributeList Definition Definitions
 | + ε
[2]DefinitionCallbackOrInterface
 | + Partial
 | + Dictionary
 | + Enum
 | + Typedef
 | + ImplementsStatement
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | + Interface
+ +
Example
+

+ The following is an example of an IDL fragment. +

+
IDL
interface Paint { };
+
+interface SolidColor : Paint {
+  attribute double red;
+  attribute double green;
+  attribute double blue;
+};
+
+interface Pattern : Paint {
+  attribute DOMString imageURL;
+};
+
+[Constructor]
+interface GraphicalWindow {
+  readonly attribute unsigned long width;
+  readonly attribute unsigned long height;
+
+  attribute Paint currentPaint;
+
+  void drawRectangle(double x, double y, double width, double height);
+
+  void drawText(double x, double y, DOMString text);
+};
+

+ Here, four interfaces + are being defined. + The GraphicalWindow interface has two + read only attributes, + one writable attribute, and two operations + defined on it. Objects that implement the GraphicalWindow interface + will expose these attributes and operations in a manner appropriate to the + particular language being used. +

+

+ In ECMAScript, the attributes on the IDL interfaces will be exposed as accessor + properties and the operations as Function-valued + data properties on a prototype object for all GraphicalWindow + objects; each ECMAScript object that implements GraphicalWindow + will have that prototype object in its prototype chain. +

+ +

+ The [Constructor] that appears on GraphicalWindow + is an extended attribute. + This extended attribute causes a constructor to exist in ECMAScript implementations, + so that calling new GraphicalWindow() would return a new object + that implemented the interface. +

+
+ +
+

3.1 Names

+ +

+ Every interface, + partial interface definition, + dictionary, + partial dictionary definition, + enumeration, + callback function and + typedef (together called named definitions) + and every constant, + attribute, + and dictionary member has an + identifier, as do some + operations. + The identifier is determined by an + identifier token somewhere + in the declaration: +

+
    +
  • + For named definitions, + the identifier token that appears + directly after the interface, + dictionary, enum + or callback keyword + determines the identifier of that definition. +
    interface interface-identifier { interface-members… };
    +partial interface interface-identifier { interface-members… };
    +dictionary dictionary-identifier { dictionary-members… };
    +partial dictionary dictionary-identifier { dictionary-members… };
    +enum enumeration-identifier { enumeration-values… };
    +callback callback-identifier = callback-signature;
    +
  • +
  • + For attributes, + typedefs + and dictionary members, + the final identifier token before the + semicolon at the end of the declaration determines the identifier. +
    interface identifier {
    +  attribute type attribute-identifier;
    +};
    +
    +typedef type typedef-identifier;
    +
    +dictionary identifier {
    +  type dictionary-member-identifier;
    +};
    +
  • +
  • + For constants, + the identifier token before the + equals sign determines the identifier. +
    const type constant-identifier = value;
    +
  • +
  • + For operations, the + identifier token that appears + after the return type but before the opening parenthesis (that is, + one that is matched as part of the OptionalIdentifier + grammar symbol in an OperationRest) determines the identifier of the operation. If + there is no such identifier token, + then the operation does not have an identifier. +
    return-type operation-identifier(arguments…);
    +
  • +
+
Note
+

+ Operations can have no identifier when they are being used to declare a + special kind of operation, such as a getter or setter. +

+
+

+ For all of these constructs, the identifier + is the value of the identifier token with any leading + U+005F LOW LINE ("_") character (underscore) removed. +

+
Note
+

+ A leading "_" is used to escape an identifier from looking + like a reserved word so that, for example, an interface named “interface” can be + defined. The leading "_" is dropped to unescape the + identifier. +

+
+

+ Operation arguments can take a slightly wider set of identifiers. In an operation + declaration, the identifier of an argument is specified immediately after its + type and is given by either an identifier + token or by one of the keywords that match the ArgumentNameKeyword + symbol. If one of these keywords is used, it need not be escaped with a leading + underscore. +

+
return-type operation-identifier(argument-type argument-identifier, …);
+
[71]ArgumentNameKeyword + "attribute"
 | + "callback"
 | + "const"
 | + "deleter"
 | + "dictionary" +
 | + "enum"
 | + "getter"
 | + "implements"
 | + "inherit"
 | + "interface"
 | + "iterable" +
 | + "legacycaller"
 | + "partial"
 | + "required"
 | + "serializer"
 | + "setter"
 | + "static"
 | + "stringifier"
 | + "typedef" +
 | + "unrestricted" +
+

+ If an identifier token is used, then the + identifier of the operation argument + is the value of that token with any leading + U+005F LOW LINE ("_") character (underscore) removed. + If instead one of the ArgumentNameKeyword + keyword token is used, then the identifier of the operation argument + is simply that token. +

+

+ The identifier of any of the abovementioned + IDL constructs MUST NOT be “constructor”, + “toString”, “toJSON”, + or begin with a U+005F LOW LINE ("_") character. These + are known as reserved identifiers. +

+
Note
+

Further restrictions on identifier names for particular constructs may be made + in later sections.

+
+

+ Within the set of IDL fragments + that a given implementation supports, + the identifier of every + interface, + dictionary, + enumeration, + callback function and + typedef + MUST NOT + be the same as the identifier of any other + interface, + dictionary, + enumeration, + callback function or + typedef. +

+

+ Within an IDL fragment, a reference + to a definition need not appear after + the declaration of the referenced definition. References can also be made + across IDL fragments. +

+
Example
+

Therefore, the following IDL fragment is valid:

+
IDL
interface B : A {
+  void f(SequenceOfLongs x);
+};
+
+interface A {
+};
+
+typedef sequence<long> SequenceOfLongs;
+
+ +
Example
+

+ The following IDL fragment + demonstrates how identifiers + are given to definitions and interface members. +

+
IDL
// Typedef identifier: "number"
+typedef double number;
+
+// Interface identifier: "System"
+interface System {
+
+  // Operation identifier:          "createObject"
+  // Operation argument identifier: "interface"
+  object createObject(DOMString _interface);
+
+  // Operation argument identifier: "interface"
+  sequence<object> getObjects(DOMString interface);
+
+  // Operation has no identifier; it declares a getter.
+  getter DOMString (DOMString keyName);
+};
+
+// Interface identifier: "TextField"
+interface TextField {
+
+  // Attribute identifier: "const"
+  attribute boolean _const;
+
+  // Attribute identifier: "value"
+  attribute DOMString? _value;
+};
+

+ Note that while the second attribute + on the TextField interface + need not have been escaped with an underscore (because “value” is + not a keyword in the IDL grammar), it is still unescaped + to obtain the attribute’s identifier. +

+
+
+ +
+

3.2 Interfaces

+ +

+ IDL fragments are used to + describe object oriented systems. In such systems, objects are entities + that have identity and which are encapsulations of state and behavior. + An interface is a definition (matching + Interface or + "callback" Interface) that declares some + state and behavior that an object implementing that interface will expose. +

+
interface identifier {
+  interface-members…
+};
+

+ An interface is a specification of a set of + interface members + (matching InterfaceMembers), + which are the constants, + attributes, + operations and + other declarations that appear between the braces in the interface declaration. + Attributes describe the state that an object + implementing the interface will expose, and operations describe the + behaviors that can be invoked on the object. Constants declare + named constant values that are exposed as a convenience to users + of objects in the system. +

+

+ Interfaces in Web IDL describe how objects that implement the + interface behave. In bindings for object oriented languages, it is + expected that an object that implements a particular IDL interface + provides ways to inspect and modify the object's state and to + invoke the behavior described by the interface. +

+ +

+ An interface can be defined to inherit from another interface. + If the identifier of the interface is followed by a + U+003A COLON (":") character + and an identifier, + then that identifier identifies the inherited interface. + An object that implements an interface that inherits from another + also implements that inherited interface. The object therefore will also + have members that correspond to the interface members from the inherited interface. +

+
interface identifier : identifier-of-inherited-interface {
+  interface-members…
+};
+

+ The order that members appear in has no significance except in the + case of overloading. +

+

+ Interfaces may specify an interface member that has the same name as + one from an inherited interface. Objects that implement the derived + interface will expose the member on the derived interface. It is + language binding specific whether the overridden member can be + accessed on the object. +

+
Example
+

+ Consider the following two interfaces. +

+
IDL
interface A {
+  void f();
+  void g();
+};
+
+interface B : A {
+  void f();
+  void g(DOMString x);
+};
+

+ In the ECMAScript language binding, an instance of B + will have a prototype chain that looks like the following: +

+
  [Object.prototype: the Object prototype object]
+       ↑
+  [A.prototype: interface prototype object for A]
+       ↑
+  [B.prototype: interface prototype object for B]
+       ↑
+  [instanceOfB]
+

+ Calling instanceOfB.f() in ECMAScript will invoke the f defined + on B. However, the f from A + can still be invoked on an object that implements B by + calling A.prototype.f.call(instanceOfB). +

+ +
+

+ The inherited interfaces of + a given interface A is the set of all interfaces that A + inherits from, directly or indirectly. If A does not inherit + from another interface, then the set is empty. Otherwise, the set + includes the interface B that A inherits + from and all of B’s inherited interfaces. +

+

+ An interface MUST NOT be declared such that + its inheritance hierarchy has a cycle. That is, an interface + A cannot inherit from itself, nor can it inherit from another + interface B that inherits from A, and so on. +

+

+ Note that general multiple inheritance of interfaces is not supported, and + objects also cannot implement arbitrary sets of interfaces. + Objects can be defined to implement a single given interface A, + which means that it also implements all of A’s + inherited interfaces. In addition, + an implements statement can be + used to define that objects implementing an interface will always + also implement another interface. +

+

+ Each interface member can be preceded by a list of extended attributes (matching + ExtendedAttributeList), + which can control how the interface member will be handled in language bindings. +

+
interface identifier {
+
+  [extended-attributes]
+  const type identifier = value;
+
+  [extended-attributes]
+  attribute type identifier;
+
+  [extended-attributes]
+  return-type identifier(arguments…);
+};
+ +

+ A callback interface is + an interface + that uses the callback keyword at the start of + its definition. Callback interfaces are ones that can be + implemented by user objects + and not by platform objects, + as described in section 3.9 + . +

+
callback interface identifier {
+  interface-members…
+};
+
Note
+

See also the similarly named callback function definition.

+
+

+ Callback interfaces + MUST NOT inherit + from any non-callback interfaces, and non-callback interfaces MUST NOT + inherit from any callback interfaces. + Callback interfaces MUST NOT have any + consequential interfaces. +

+

+ Static attributes and + static operations MUST NOT + be defined on a callback interface. +

+
Warning
+

+ Specification authors SHOULD NOT define + callback interfaces + that have only a single operation, + unless required to describe the requirements of existing APIs. + Instead, a callback function SHOULD be used. +

+

+ The definition of EventListener as a + callback interface + is an example of an existing API that needs to allow + user objects with a + given property (in this case “handleEvent”) to be considered to implement the interface. + For new APIs, and those for which there are no compatibility concerns, + using a callback function will allow + only a Function object (in the ECMAScript + language binding). +

+
+ + + +
Note
+

+ Specification authors wanting to define APIs that take ECMAScript objects + as “property bag” like function arguments are suggested to use + dictionary types rather than + callback interfaces. +

+

+ For example, instead of this: +

+
IDL
callback interface Options {
+  attribute DOMString? option1;
+  attribute DOMString? option2;
+  attribute long? option3;
+};
+
+interface A {
+  void doTask(DOMString type, Options options);
+};
+

+ to be used like this: +

+
ECMAScript
var a = getA();  // Get an instance of A.
+
+a.doTask("something", { option1: "banana", option3: 100 });
+

+ instead write the following: +

+
IDL
dictionary Options {
+  DOMString? option1;
+  DOMString? option2;
+  long? option3;
+};
+
+interface A {
+  void doTask(DOMString type, Options options);
+};
+
+ +

+ The IDL for interfaces can be split into multiple parts by using + partial interface definitions + (matching "partial" PartialInterface). + The identifier of a partial + interface definition MUST be the same + as the identifier of an interface definition. All of + the members that appear on each of the partial interfaces are considered to be + members of the interface itself. +

+
interface SomeInterface {
+  interface-members…
+};
+
+partial interface SomeInterface {
+  interface-members…
+};
+
Note
+

Partial interface definitions are intended for use as a specification + editorial aide, allowing the definition of an interface to be separated + over more than one section of the document, and sometimes multiple documents.

+
+

+ The order of appearance of an interface + definition and any of its partial interface + definitions does not matter. +

+
Note
+

A partial interface definition cannot specify that the interface + inherits from another interface. + Inheritance must be specified on the original interface + definition.

+
+

+ Extended attributes can be specified on + partial interface definitions, with some + limitations. The following extended attributes MUST NOT + be specified on partial interface definitions: + [Constructor], + + [NamedConstructor], + [NoInterfaceObject]. +

+
Note
+

The above list of extended attributes + is all of those defined in this document that are applicable to + interfaces except for + [Exposed], + [Global], + [OverrideBuiltins], + [PrimaryGlobal] and + [Unforgeable].

+
+

+ Any extended attribute specified + on a partial interface definition + is considered to appear on the interface + itself. +

+

+ The relevant language binding determines how interfaces correspond to constructs + in the language. +

+ + +

+ The following extended attributes are applicable to interfaces: + [Constructor], + [Exposed], + [Global], + + [NamedConstructor], + [NoInterfaceObject], + [OverrideBuiltins]. + [PrimaryGlobal], + [Unforgeable]. +

+ +
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | + Interface
[4]CallbackRestOrInterfaceCallbackRest
 | + Interface
[5]Interface"interface" identifier Inheritance "{" InterfaceMembers "}" ";"
[6]Partial"partial" PartialDefinition
[7]PartialDefinitionPartialInterface
 | + PartialDictionary
[8]PartialInterface"interface" identifier "{" InterfaceMembers "}" ";"
[9]InterfaceMembersExtendedAttributeList InterfaceMember InterfaceMembers
 | + ε
[10]InterfaceMemberConst
 | + Operation
 | + Serializer
 | + Stringifier
 | + StaticMember
 | + Iterable
 | + ReadOnlyMember
 | + ReadWriteAttribute
[18]Inheritance":" identifier
 | + ε
+ +
Example
+ +

+ The following IDL fragment + demonstrates the definition of two mutually referential interfaces. + Both Human and Dog + inherit from Animal. Objects that implement + either of those two interfaces will thus have a name attribute. +

+
IDL
interface Animal {
+  attribute DOMString name;
+};
+
+interface Human : Animal {
+  attribute Dog? pet;
+};
+
+interface Dog : Animal {
+  attribute Human? owner;
+};
+
+ +
Example
+

+ The following IDL fragment defines + simplified versions of a few DOM interfaces, one of which + is a callback interface. +

+
IDL
interface Node {
+  readonly attribute DOMString nodeName;
+  readonly attribute Node? parentNode;
+  Node appendChild(Node newChild);
+  void addEventListener(DOMString type, EventListener listener);
+};
+
+callback interface EventListener {
+  void handleEvent(Event event);
+};
+

+ Since the EventListener interface is annotated + callback interface, user objects + can implement it: +

+
ECMAScript
var node = getNode();                                // Obtain an instance of Node.
+
+var listener = {
+  handleEvent: function(event) {
+    ...
+  }
+};
+node.addEventListener("click", listener);            // This works.
+
+node.addEventListener("click", function() { ... });  // As does this.
+

+ It is not possible for a user object to implement Node, however: +

+
ECMAScript
var node = getNode();  // Obtain an instance of Node.
+
+var newNode = {
+  nodeName: "span",
+  parentNode: null,
+  appendChild: function(newchild) {
+    ...
+  },
+  addEventListener: function(type, listener) {
+    ...
+  }
+};
+node.appendChild(newNode);  // This will throw a TypeError exception.
+
+ +
+

3.2.1 Constants

+ +

+ A constant is a declaration (matching + Const) used to bind a constant value to a name. + Constants can appear on interfaces. +

+
Warning
+

+ Constants have in the past primarily been used to define + named integer codes in the style of an enumeration. The Web platform + is moving away from this design pattern in favor of the use of strings. + Specification authors who wish to define constants are strongly advised to discuss + this on the public-script-coord@w3.org + mailing list before proceeding. +

+
+
const type identifier = value;
+

+ The identifier of a + constant + MUST NOT be the same as the identifier + of another interface member + defined on the same interface. + The identifier also MUST NOT + be “length”, “name” or “prototype”. +

+
Note
+

+ These three names are the names of properties that exist on all + Function objects. +

+
+

+ The type of a constant (matching ConstType) + MUST NOT be any type other than + a primitive type + or a nullable primitive type. + If an identifier is used, + it MUST reference a typedef + whose type is a primitive type or a nullable primitive type. +

+

+ The ConstValue part of a + constant declaration gives the value of the constant, which can be + one of the two boolean literal tokens (true + and false), + the null token, an + integer token, + a float token, + or one of the three special floating point constant values + (-Infinity, Infinity and NaN). +

+
Note
+

+ These values – in addition to strings and the empty sequence – can also be used to specify the + default value + of a dictionary member or of + an optional argument. Note that strings and the + empty sequence [] cannot be used as the value of a + constant. +

+
+

+ The value of the boolean literal tokens true and + false are the IDL boolean values + true and false. +

+

+ The value of an integer token is an integer + whose value is determined as follows: +

+
    +
  1. Let S be the sequence of characters matched by the integer token.
  2. +
  3. Let sign be −1 if S begins with U+002D HYPHEN-MINUS ("-"), and 1 otherwise.
  4. +
  5. Let base be the base of the number based on the characters that follow the optional leading U+002D HYPHEN-MINUS ("-") character: +
    +
    U+0030 DIGIT ZERO ("0"), U+0058 LATIN CAPITAL LETTER X ("X")
    +
    U+0030 DIGIT ZERO ("0"), U+0078 LATIN SMALL LETTER X ("x")
    +
    The base is 16.
    +
    U+0030 DIGIT ZERO ("0")
    +
    The base is 8.
    +
    Otherwise
    +
    The base is 10.
    +
    +
  6. +
  7. Let number be the result of interpreting all remaining characters following the optional leading U+002D HYPHEN-MINUS ("-") + character and any characters indicating the base as an integer specified in base base.
  8. +
  9. Return sign × number.
  10. +
+

+ The type of an integer token is the same + as the type of the constant, dictionary member or optional argument it is being used as the value of. + The value of the integer token MUST NOT + lie outside the valid range of values for its type, as given in + section 3.10 . +

+

+ The value of a float token is + either an IEEE 754 single-precision floating point number or an IEEE 754 + double-precision floating point number, depending on the type of the + constant, dictionary member or optional argument it is being used as the value for, determined as follows: +

+
    +
  1. Let S be the sequence of characters matched by the float token.
  2. +
  3. Let value be the Mathematical Value that would be obtained if S were + parsed as an ECMAScript NumericLiteral ( + [ECMA-262] + , section 11.8.3).
  4. +
  5. + If the float token is being + used as the value for a float or + unrestricted float, then + the value of the float token + is the IEEE 754 single-precision floating point number closest to + result. Otherwise, the float token is being + used as the value for a double or + unrestricted double, and + the value of the float token + is the IEEE 754 double-precision floating point number closest to + result. + [IEEE-754] +
  6. +
+

+ The value of a constant value specified as + Infinity, -Infinity or NaN is either + an IEEE 754 single-precision floating point number or an IEEE 754 + double-precision floating point number, depending on the type of the + constant, dictionary member or optional argument is is being used as the + value for: +

+
+
Type unrestricted float, constant value Infinity
+
The value is the IEEE 754 single-precision positive infinity value.
+
Type unrestricted double, constant value Infinity
+
The value is the IEEE 754 double-precision positive infinity value.
+
Type unrestricted float, constant value -Infinity
+
The value is the IEEE 754 single-precision negative infinity value.
+
Type unrestricted double, constant value -Infinity
+
The value is the IEEE 754 double-precision negative infinity value.
+
Type unrestricted float, constant value NaN
+
The value is the IEEE 754 single-precision NaN value with the bit pattern 0x7fc00000.
+
Type unrestricted double, constant value NaN
+
The value is the IEEE 754 double-precision NaN value with the bit pattern 0x7ff8000000000000.
+
+

+ The type of a float token is the same + as the type of the constant, dictionary member or optional argument it is being used as the value of. The value of the + float token MUST NOT + lie outside the valid range of values for its type, as given in + section 3.10 . + Also, Infinity, -Infinity and NaN MUST NOT + be used as the value of a float + or double. +

+

+ The value of the null token is the special + null value that is a member of the + nullable types. The type of + the null token is the same as the + type of the constant, dictionary member or optional argument it is being used as the value of. +

+

+ If VT is the type of the value assigned to a constant, and DT + is the type of the constant, dictionary member or optional argument itself, then these types MUST + be compatible, which is the case if DT and VT are identical, + or DT is a nullable type + whose inner type is VT. +

+

+ Constants are not associated with + particular instances of the interface + on which they appear. It is language binding specific whether + constants are exposed on instances. +

+
Note
+

+ + The ECMAScript language binding does however + allow constants to be accessed + through objects implementing the IDL interfaces + on which the constants are declared. + For example, with the following IDL: +

+
IDL
interface A {
+  const short rambaldi = 47;
+};
+

+ the constant value can be accessed in ECMAScript either as + A.rambaldi or instanceOfA.rambaldi. +

+
+

+ The following extended attributes are applicable to constants: + [Exposed]. +

+ +
[26]Const"const" ConstType identifier "=" ConstValue ";"
[27]ConstValueBooleanLiteral
 | + FloatLiteral
 | + integer
 | + "null"
[28]BooleanLiteral"true"
 | + "false"
[29]FloatLiteralfloat
 | + "-Infinity"
 | + "Infinity"
 | + "NaN"
[80]ConstTypePrimitiveType Null
 | + identifier Null
+
Example
+

+ The following IDL fragment + demonstrates how constants + of the above types can be defined. +

+
IDL
interface Util {
+  const boolean DEBUG = false;
+  const octet LF = 10;
+  const unsigned long BIT_MASK = 0x0000fc00;
+  const double AVOGADRO = 6.022e23;
+};
+
+
+ +
+

3.2.2 Attributes

+ +

+ An attribute is an interface member + (matching "static" AttributeRest, + "stringifier" AttributeRest, + or Attribute) + that is used to declare data fields with a given type and + identifier whose value can + be retrieved and (in some cases) changed. There are two kinds of attributes: +

+
    +
  1. regular attributes, which are those + used to declare that objects implementing the interface + will have a data field member with the given identifier +
    attribute type identifier;
  2. +
  3. static attributes, which are used + to declare attributes that are not associated with a particular object implementing the interface +
    static attribute type identifier;
  4. +
+

+ If an attribute has no static keyword, then it declares a + regular attribute. Otherwise, + it declares a static attribute. +

+

+ The identifier of an + attribute + MUST NOT be the same as the identifier + of another interface member + defined on the same interface. + The identifier of a static attribute MUST NOT + be “prototype”. +

+

+ The type of the attribute is given by the type (matching Type) + that appears after the attribute keyword. + If the Type is an + identifier or an identifier followed by ?, + then the identifier MUST + identify an interface, enumeration, + callback function or typedef. +

+

+ The type of the attribute, after resolving typedefs, MUST NOT be a + nullable or non-nullable version of any of the following types: +

+ +

+ The attribute is read only if the + readonly keyword is used before the attribute keyword. + An object that implements the interface on which a read only attribute + is defined will not allow assignment to that attribute. It is language + binding specific whether assignment is simply disallowed by the language, + ignored or an exception is thrown. +

+
readonly attribute type identifier;
+

+ A regular attribute + that is not read only + can be declared to inherit its getter + from an ancestor interface. This can be used to make a read only attribute + in an ancestor interface be writable on a derived interface. An attribute + inherits its getter if + its declaration includes inherit in the declaration. + The read only attribute from which the attribute inherits its getter + is the attribute with the same identifier on the closest ancestor interface + of the one on which the inheriting attribute is defined. The attribute + whose getter is being inherited MUST be + of the same type as the inheriting attribute, and inherit + MUST NOT appear on a read only + attribute or a static attribute. +

+
interface Ancestor {
+  readonly attribute TheType theIdentifier;
+};
+
+interface Derived : Ancestor {
+  inherit attribute TheType theIdentifier;
+};
+ +

+ When the stringifier keyword is used + in a regular attribute + declaration, it indicates that objects implementing the + interface will be stringified to the value of the attribute. See + section 3.2.4.2 + for details. +

+
stringifier attribute DOMString identifier;
+

+ If an implementation attempts to get or set the value of an + attribute on a + user object + (for example, when a callback object has been supplied to the implementation), + and that attempt results in an exception being thrown, then, unless otherwise specified, that + exception will be propagated to the user code that caused the + implementation to access the attribute. Similarly, if a value + returned from getting the attribute cannot be converted to + an IDL type, then any exception resulting from this will also + be propagated to the user code that resulted in the implementation + attempting to get the value of the attribute. +

+ +

+ The following extended attributes + are applicable to regular and static attributes: + [Clamp], + [EnforceRange], + [Exposed], + [SameObject], + [TreatNullAs]. +

+ +

+ The following extended attributes + are applicable only to regular attributes: + [LenientThis], + [PutForwards], + [Replaceable], + [Unforgeable]. +

+ +
[39]ReadOnlyMember"readonly" ReadOnlyMemberRest
[40]ReadOnlyMemberRestAttributeRest
[41]ReadWriteAttribute"inherit" ReadOnly AttributeRest
 | + AttributeRest
[42]AttributeRest"attribute" Type AttributeName ";"
[43]AttributeNameAttributeNameKeyword
 | + identifier
[44]AttributeNameKeyword"required"
[45]Inherit"inherit"
 | + ε
[46]ReadOnly"readonly"
 | + ε
+ +
Example
+

+ The following IDL fragment + demonstrates how attributes + can be declared on an interface: +

+
IDL
interface Animal {
+
+  // A simple attribute that can be set to any string value.
+  readonly attribute DOMString name;
+
+  // An attribute whose value can be assigned to.
+  attribute unsigned short age;
+};
+
+interface Person : Animal {
+
+  // An attribute whose getter behavior is inherited from Animal, and need not be
+  // specified in the description of Person.
+  inherit attribute DOMString name;
+};
+
+
+ +
+

3.2.3 Operations

+ +

+ An operation is an interface member + (matching "static" OperationRest, + "stringifier" OperationRest, + "serializer" OperationRest, + ReturnType OperationRest or + SpecialOperation) + that defines a behavior that can be invoked on objects implementing the interface. + There are three kinds of operation: +

+
    +
  1. regular operations, which + are those used to declare that objects implementing the + interface will have a method with + the given identifier +
    return-type identifier(arguments…);
  2. +
  3. special operations, + which are used to declare special behavior on objects + implementing the interface, such as object indexing and stringification +
    special-keywords… return-type identifier(arguments…);
    +special-keywords… return-type (arguments…);
  4. +
  5. static operations, + which are used to declare operations that are not associated with + a particular object implementing the interface +
    static return-type identifier(arguments…);
  6. +
+

+ If an operation has an identifier but no static + keyword, then it declares a regular operation. + If the operation has one or more + special keywords + used in its declaration (that is, any keyword matching + Special, or + the stringifier keyword), + then it declares a special operation. A single operation can declare + both a regular operation and a special operation; see + section 3.2.4 + for details on special operations. +

+

+ If an operation has no identifier, + then it MUST + be declared to be a special operation using one of the + special keywords. +

+

+ The identifier of a + regular operation + or static operation + MUST NOT be the same as the identifier + of a constant or + attribute + defined on the same interface. + The identifier of a static operation MUST NOT + be “prototype”. +

+
Note
+

+ The identifier can be the same as that of another operation on the + interface, however. This is how operation overloading is specified. +

+
+

+ The identifier of a static operation + also MUST NOT be the same as the identifier + of a regular operation + defined on the same interface. +

+

+ The return type of the operation is given + by the type (matching ReturnType) + that appears before the operation’s optional identifier. + A return type of void indicates that the operation returns no value. + If the return type is an + identifier followed by ?, + then the identifier MUST + identify an interface, dictionary, enumeration, + callback function or typedef. +

+

+ An operation’s arguments (matching ArgumentList) + are given between the parentheses in the declaration. Each individual argument is specified + as a type (matching Type) followed by an identifier + (matching ArgumentName). +

+
Note
+

For expressiveness, the identifier of an operation argument can also be specified + as one of the keywords matching the ArgumentNameKeyword + symbol without needing to escape it.

+
+

+ If the Type of an operation argument is an identifier + followed by ?, + then the identifier MUST identify an interface, + enumeration, callback function + or typedef. + If the operation argument type is an identifier + not followed by ?, then the identifier MUST + identify any one of those definitions or a dictionary. +

+
return-type identifier(type identifier, type identifier, …);
+

+ The identifier of each argument MUST NOT be the same + as the identifier of another argument in the same operation declaration. +

+

+ Each argument can be preceded by a list of + extended attributes (matching + ExtendedAttributeList), + which can control how a value passed as the argument will be handled in + language bindings. +

+
return-type identifier([extended-attributes] type identifier, [extended-attributes] type identifier, …);
+ +
Example
+

+ The following IDL fragment + demonstrates how regular operations + can be declared on an interface: +

+
IDL
interface Dimensions {
+  attribute unsigned long width;
+  attribute unsigned long height;
+};
+
+interface Button {
+
+  // An operation that takes no arguments and returns a boolean.
+  boolean isMouseOver();
+
+  // Overloaded operations.
+  void setDimensions(Dimensions size);
+  void setDimensions(unsigned long width, unsigned long height);
+};
+
+ +

+ An operation is considered to be variadic + if the final argument uses the ... token just + after the argument type. Declaring an operation to be variadic indicates that + the operation can be invoked with any number of arguments after that final argument. + Those extra implied formal arguments are of the same type as the final explicit + argument in the operation declaration. The final argument can also be omitted + when invoking the operation. An argument MUST NOT + be declared with the ... token unless it + is the final argument in the operation’s argument list. +

+
return-type identifier(type... identifier);
+return-type identifier(type identifier, type... identifier);
+

+ Extended attributes + that take an argument list + ([Constructor] and + [NamedConstructor], of those + defined in this specification) and callback functions + are also considered to be variadic + when the ... token is used in their argument lists. +

+ +
Example
+

+ The following IDL fragment defines an interface that has + two variadic operations: +

+
IDL
interface IntegerSet {
+  readonly attribute unsigned long cardinality;
+
+  void union(long... ints);
+  void intersection(long... ints);
+};
+

+ In the ECMAScript binding, variadic operations are implemented by + functions that can accept the subsequent arguments: +

+
ECMAScript
var s = getIntegerSet();  // Obtain an instance of IntegerSet.
+
+s.union();                // Passing no arguments corresponding to 'ints'.
+s.union(1, 4, 7);         // Passing three arguments corresponding to 'ints'.
+

+ A binding for a language that does not support variadic functions + might specify that an explicit array or list of integers be passed + to such an operation. +

+
+ +

+ An argument is considered to be an optional argument + if it is declared with the optional keyword. + The final argument of a variadic operation + is also considered to be an optional argument. Declaring an argument + to be optional indicates that the argument value can be omitted + when the operation is invoked. The final argument in an + operation MUST NOT explicitly be declared to be + optional if the operation is variadic. +

+
return-type identifier(type identifier, optional type identifier);
+ +

+ Optional arguments can also have a default value + specified. If the argument’s identifier is followed by a U+003D EQUALS SIGN ("=") + and a value (matching DefaultValue), + then that gives the optional argument its default value. + The implicitly optional final argument of a variadic + operation MUST NOT have a default value specified. + The default value is the value to be assumed when the operation is called with the + corresponding argument omitted. +

+
return-type identifier(type identifier, optional type identifier = value);
+
Warning
+

+ It is strongly suggested not to use default value + of true for boolean-typed arguments, + as this can be confusing for authors who might otherwise expect the default + conversion of undefined to be used (i.e., false). +

+
+

+ If the type of an argument is a dictionary type + or a union type that has a + dictionary type as one of its flattened member types, + and that dictionary type and its ancestors have no required members, + and the argument is either the final argument or is followed only by + optional arguments, then + the argument MUST be specified as optional. + Such arguments are always considered to have a + default value of an empty dictionary, + unless otherwise specified. +

+
Note
+

+ This is to encourage API designs that do not require authors to pass an + empty dictionary value when they wish only to use the dictionary’s + default values. +

+

+ Dictionary types cannot have a default value specified explicitly, so the + “unless otherwise specified” clause above can only be invoked for + a union type that has a + dictionary type as one of its flattened member types. +

+
+

+ When a boolean literal token (true or false), + the null token, + an integer token, a + float token or one of + the three special floating point literal values (Infinity, + -Infinity or NaN) is used as the + default value, + it is interpreted in the same way as for a constant. +

+

+ Optional argument default values can also be specified using a string + token, whose value is a string type + determined as follows: +

+
    +
  1. Let S be the sequence of Unicode scalar values matched by the string token with its leading and trailing U+0022 QUOTATION MARK ('"') characters removed.
  2. +
  3. Depending on the type of the argument: +
    +
    DOMString
    +
    an enumeration type
    +
    The value of the string token is the sequence of 16 bit unsigned integer code units (hereafter referred to just as code units) corresponding to the UTF-16 encoding of S.
    +
    ByteString
    +
    The value of the string token is the sequence of 8 bit unsigned integer code units corresponding to the UTF-8 encoding of S.
    +
    USVString
    +
    The value of the string token is S.
    +
    +
  4. +
+

+ If the type of the optional argument + is an enumeration, then its + default value if specified MUST + be one of the enumeration’s values. +

+

+ Optional argument default values can also be specified using the + two token value [], which represents an empty sequence + value. The type of this value is the same the type of the optional + argument it is being used as the default value of. That type + MUST be a + sequence type or a + nullable type. +

+ +
Example
+

+ The following IDL fragment + defines an interface + with a single operation + that can be invoked with two different argument list lengths: +

+
IDL
interface ColorCreator {
+  object createColor(double v1, double v2, double v3, optional double alpha);
+};
+

+ It is equivalent to an interface + that has two overloaded + operations: +

+
IDL
interface ColorCreator {
+  object createColor(double v1, double v2, double v3);
+  object createColor(double v1, double v2, double v3, double alpha);
+};
+
+ + +

+ If an implementation attempts to invoke an + operation on a + user object (for example, when a callback object + has been supplied to the implementation), and that attempt results in an + exception being thrown, then, unless otherwise specified, that + exception will be propagated to the user code that caused the + implementation to invoke the operation. Similarly, if a value + returned from invoking the operation cannot be converted to + an IDL type, then any exception resulting from this will also + be propagated to the user code that resulted in the implementation + attempting to invoke the operation. +

+ +

+ The following extended attributes + are applicable to operations: + [Exposed], + [NewObject], + [TreatNullAs], + [Unforgeable]. +

+

+ The following extended attributes are applicable to operation arguments: + [Clamp], + [EnforceRange], + [TreatNullAs]. +

+ +
[17]DefaultValueConstValue
 | + string
 | + "[" "]"
[47]OperationReturnType OperationRest
 | + SpecialOperation
[48]SpecialOperationSpecial Specials ReturnType OperationRest
[49]SpecialsSpecial Specials
 | + ε
[50]Special"getter"
 | + "setter"
 | + "deleter"
 | + "legacycaller"
[51]OperationRestOptionalIdentifier "(" ArgumentList ")" ";"
[52]OptionalIdentifieridentifier
 | + ε
[53]ArgumentListArgument Arguments
 | + ε
[54]Arguments"," Argument Arguments
 | + ε
[55]ArgumentExtendedAttributeList OptionalOrRequiredArgument
[56]OptionalOrRequiredArgument"optional" Type ArgumentName Default
 | + Type Ellipsis ArgumentName
[57]ArgumentNameArgumentNameKeyword
 | + identifier
[58]Ellipsis"..."
 | + ε
[71]ArgumentNameKeyword + "attribute"
 | + "callback"
 | + "const"
 | + "deleter"
 | + "dictionary" +
 | + "enum"
 | + "getter"
 | + "implements"
 | + "inherit"
 | + "interface"
 | + "iterable" +
 | + "legacycaller"
 | + "partial"
 | + "required"
 | + "serializer"
 | + "setter"
 | + "static"
 | + "stringifier"
 | + "typedef" +
 | + "unrestricted" +
[89]ReturnTypeType
 | + "void"
+
+ +
+

3.2.4 Special operations

+ +

+ A special operation is a + declaration of a certain kind of special behavior on objects implementing + the interface on which the special operation declarations appear. + Special operations are declared by using one or more + special keywords + in an operation declaration. +

+

+ There are seven kinds of special operations. The table below indicates + for a given kind of special operation what special keyword + is used to declare it and what the purpose of the special operation is: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Special operationKeywordPurpose
GettersgetterDefines behavior for when an object is indexed for property retrieval.
SetterssetterDefines behavior for when an object is indexed for property + assignment or creation.
DeletersdeleterDefines behavior for when an object is indexed for property deletion.
Legacy callerslegacycallerDefines behavior for when an object is called as if it were a function.
StringifiersstringifierDefines how an object is converted into a DOMString.
SerializersserializerDefines how an object is converted into a serialized form.
+

+ Not all language bindings support all of the six kinds of special + object behavior. When special operations are declared using + operations with no identifier, then in language bindings that do + not support the particular kind of special operations there simply + will not be such functionality. +

+
Example
+

The following IDL fragment defines an interface with a getter and a setter:

+
IDL
interface Dictionary {
+  readonly attribute unsigned long propertyCount;
+
+  getter double (DOMString propertyName);
+  setter void (DOMString propertyName, double propertyValue);
+};
+

In language bindings that do not support property getters and setters, + objects implementing Dictionary will not + have that special behavior.

+
+ +

+ Defining a special operation with an identifier + is equivalent to separating the special operation out into its own + declaration without an identifier. This approach is allowed to + simplify prose descriptions of an interface’s operations. +

+
Example
+

The following two interfaces are equivalent:

+
IDL
interface Dictionary {
+  readonly attribute unsigned long propertyCount;
+
+  getter double getProperty(DOMString propertyName);
+  setter void setProperty(DOMString propertyName, double propertyValue);
+};
+
IDL
interface Dictionary {
+  readonly attribute unsigned long propertyCount;
+
+  double getProperty(DOMString propertyName);
+  void setProperty(DOMString propertyName, double propertyValue);
+
+  getter double (DOMString propertyName);
+  setter void (DOMString propertyName, double propertyValue);
+};
+
+ +

+ A given special keyword MUST NOT + appear twice on an operation. +

+

+ Getters and setters come in two varieties: ones that + take a DOMString as a property name, + known as + named property getters and + named property setters, + and ones that take an unsigned long + as a property index, known as + indexed property getters and + indexed property setters. + There is only one variety of deleter: + named property deleters. + See section 3.2.4.4 + and section 3.2.4.5 + for details. +

+

+ On a given interface, + there MUST exist at most one + stringifier, at most one serializer, at most one + named property deleter, + and at most one of each variety of getter and setter. + Multiple legacy callers can exist on an interface + to specify overloaded calling behavior. +

+

+ If an interface has a setter of a given variety, + then it MUST also have a getter of that + variety. If it has a named property deleter, + then it MUST also have a + named property getter. +

+

+ Special operations declared using operations MUST NOT + be variadic nor have any + optional arguments. +

+

+ Special operations MUST NOT be declared on + callback interfaces. +

+

+ If an object implements more than one interface + that defines a given special operation, then it is undefined which (if any) + special operation is invoked for that operation. +

+ +
+
3.2.4.1 Legacy callers
+ +

+ When an interface has one or more + legacy callers, it indicates that objects that implement + the interface can be called as if they were functions. As mentioned above, + legacy callers can be specified using an operation + declared with the legacycaller keyword. +

+
legacycaller return-type identifier(arguments…);
+legacycaller return-type (arguments…);
+

+ If multiple legacy callers are specified on an interface, overload resolution + is used to determine which legacy caller is invoked when the object is called + as if it were a function. +

+

+ Legacy callers MUST NOT be defined to return a + promise type. +

+
Warning
+

+ Legacy callers are universally recognised as an undesirable feature. They exist + only so that legacy Web platform features can be specified. Legacy callers + SHOULD NOT be used in specifications unless required to + specify the behavior of legacy APIs, and even then this should be discussed on + the public-script-coord@w3.org + mailing list before proceeding. +

+
+ +
Example
+

+ The following IDL fragment + defines an interface + with a legacy caller. +

+
IDL
interface NumberQuadrupler {
+  // This operation simply returns four times the given number x.
+  legacycaller double compute(double x);
+};
+

+ An ECMAScript implementation supporting this interface would + allow a platform object + that implements NumberQuadrupler + to be called as a function: +

+
ECMAScript
var f = getNumberQuadrupler();  // Obtain an instance of NumberQuadrupler.
+
+f.compute(3);                   // This evaluates to 12.
+f(3);                           // This also evaluates to 12.
+
+
+ +
+
3.2.4.2 Stringifiers
+ +

+ When an interface has a + stringifier, it indicates that objects that implement + the interface have a non-default conversion to a string. As mentioned above, + stringifiers can be specified using an operation + declared with the stringifier keyword. +

+
stringifier DOMString identifier();
+stringifier DOMString ();
+

+ If an operation used to declare a stringifier does not have an + identifier, then prose + accompanying the interface MUST define + the stringification behavior + of the interface. If the operation does have an identifier, + then the object is converted to a string by invoking the + operation to obtain the string. +

+

+ Stringifiers declared with operations MUST + be declared to take zero arguments and return a DOMString. +

+

+ As a shorthand, if the stringifier keyword + is declared using an operation with no identifier, then the + operation’s return type and + argument list can be omitted. +

+
stringifier;
+
Example
+

The following two interfaces are equivalent:

+
IDL
interface A {
+  stringifier DOMString ();
+};
+
IDL
interface A {
+  stringifier;
+};
+
+

+ The stringifier keyword + can also be placed on an attribute. + In this case, the string to convert the object to is the + value of the attribute. The stringifier keyword + MUST NOT be placed on an attribute unless + it is declared to be of type DOMString or USVString. + It also MUST NOT be placed on + a static attribute. +

+
stringifier attribute DOMString identifier;
+ +
[35]Stringifier"stringifier" StringifierRest
[36]StringifierRestReadOnly AttributeRest
 | + ReturnType OperationRest
 | + ";"
+ +
Example
+

+ The following IDL fragment + defines an interface that will stringify to the value of its + name attribute: +

+
IDL
[Constructor]
+interface Student {
+  attribute unsigned long id;
+  stringifier attribute DOMString name;
+};
+

+ In the ECMAScript binding, using a Student + object in a context where a string is expected will result in the + value of the object’s “name” property being + used: +

+
ECMAScript
var s = new Student();
+s.id = 12345678;
+s.name = '周杰倫';
+
+var greeting = 'Hello, ' + s + '!';  // Now greeting == 'Hello, 周杰倫!'.
+

+ The following IDL fragment + defines an interface that has custom stringification behavior that is + not specified in the IDL itself. +

+
IDL
[Constructor]
+interface Student {
+  attribute unsigned long id;
+  attribute DOMString? familyName;
+  attribute DOMString givenName;
+
+  stringifier DOMString ();
+};
+

+ Thus, prose is required to explain the stringification behavior, such + as the following paragraph: +

+
+

+ Objects that implement the Student + interface must stringify as follows. If the value of the + familyName attribute is + null, the stringification of the + object is the value of the givenName + attribute. Otherwise, if the value of the + familyName attribute is not null, + the stringification of the object is the concatenation of the + value of the givenName attribute, + a single space character, and the value of + the familyName attribute. +

+
+

+ An ECMAScript implementation of the IDL would behave as follows: +

+
ECMAScript
var s = new Student();
+s.id = 12345679;
+s.familyName = 'Smithee';
+s.givenName = 'Alan';
+
+var greeting = 'Hi ' + s;  // Now greeting == 'Hi Alan Smithee'.
+
+
+ +
+
3.2.4.3 Serializers
+ +

+ When an interface has a + serializer, it indicates that objects provide + a way for them to be converted into a serialized form. Serializers can be declared + using the serializer keyword: +

+
serializer;
+

+ Prose accompanying an interface that declares a serializer in this + way MUST define the + serialization behavior + of the interface. Serialization behavior is defined as returning + a serialized value of one of the following types: +

+ +

+ How the serialization behavior + is made available on an object in a language binding, and how exactly the abstract + serialized value is converted into + an appropriate concrete value, is language binding specific. +

+
Note
+

In the ECMAScript language binding, + serialization behavior + is exposed as a toJSON method which returns the + serialized value converted + into an ECMAScript value that can be serialized to JSON by the + JSON.stringify function. See section 4.5.8.2 + for details.

+
+

+ Serialization behavior + can also be specified directly in IDL, rather than separately as prose. + This is done by following the serializer keyword with + a U+003D EQUALS SIGN ("=") character and + a serialization pattern, + which can take one of the following six forms: +

+
    +
  • +

    A map with entries corresponding to zero or more attributes from the interface, and optionally + attributes from an inherited interface:

    +
    serializer = { attribute-identifier, attribute-identifier, … };
    +serializer = { inherit, attribute-identifier, attribute-identifier, … };
    +

    Each identifier MUST be the identifier of an attribute declared + on the interface. The identified attributes all MUST have a + serializable type.

    +

    The inherit keyword MUST NOT be used unless + the interface inherits from another that defines a serializer, and the closest such interface + defines its serializer using this serialization pattern + form or the following form (i.e. { attribute }).

    +

    The serialization behavior for this + form of serialization pattern is as follows:

    +
      +
    1. Let map be an empty map.
    2. +
    3. If the inherit keyword was used, then set map to be the result of + the serialization behavior of the + closest inherited interface that declares a serializer.
    4. +
    5. For each attribute identifier i in the serialization pattern, in order: +
        +
      1. Remove any entry in map with key name i.
      2. +
      3. Let V be the value of the attribute with identifier i.
      4. +
      5. Add an entry to map whose key name is i and whose + value is result of converting + V to a serialized value.
      6. +
      +
    6. +
    7. Return map.
    8. +
    +
  • +
  • +

    A map with entries corresponding to all attributes from the interface that have + a serializable type, and optionally + attributes from an inherited interface:

    +
    serializer = { attribute };
    +serializer = { inherit, attribute };
    +

    The inherit keyword MUST NOT be used unless + the interface inherits from another that defines a serializer, and the closest such interface + defines its serializer using this serialization pattern + form or the previous form.

    +

    The serialization behavior for this + form of serialization pattern is as follows:

    +
      +
    1. Let map be an empty map.
    2. +
    3. If the inherit keyword was used, then set map to be the result of + the serialization behavior of the + closest inherited interface that declares a serializer.
    4. +
    5. For each identifier i of an attribute on the interface whose type is + a serializable type, in the order they appear + on the interface: +
        +
      1. Remove any entry in map with key name i.
      2. +
      3. Let V be the value of the attribute with identifier i.
      4. +
      5. Add an entry to map whose key name is i and whose + value is result of converting + V to a serialized value.
      6. +
      +
    6. +
    7. Return map.
    8. +
    +
  • +
  • +

    A map with entries corresponding to the named properties:

    +
    serializer = { getter };
    +

    This form MUST NOT be used unless the interface or one it + inherits from supports named properties and the return type of the named property getter + is a serializable type.

    +

    The serialization behavior for this + form of serialization pattern is as follows:

    +
      +
    1. Let map be an empty map.
    2. +
    3. For each supported property name n on the object, in order: +
        +
      1. Let V be the value of the named property with name n.
      2. +
      3. Add an entry to map whose key name is i and whose + value is result of converting + V to a serialized value.
      4. +
      +
    4. +
    5. Return map.
    6. +
    +
  • +
  • +

    A list of value of zero or more attributes on the interface:

    +
    serializer = [ attribute-identifier, attribute-identifier, … ];
    +

    Each identifier MUST be the identifier of an attribute declared + on the interface. The identified attributes all MUST have a + serializable type.

    +

    The serialization behavior for this + form of serialization pattern is as follows:

    +
      +
    1. Let list be an empty list.
    2. +
    3. For each attribute identifier i in the serialization pattern: +
        +
      1. Let V be the value of the attribute with identifier i.
      2. +
      3. Append to list the value that is the result of + converting + V to a serialized value.
      4. +
      +
    4. +
    5. Return list.
    6. +
    +
  • +
  • +

    A list with entries corresponding to the indexed properties:

    +
    serializer = [ getter ];
    +

    This form MUST NOT be used unless the interface or one it + inherits from supports indexed properties and the return type of the indexed property getter + is a serializable type.

    +

    The serialization behavior for this + form of serialization pattern is as follows:

    +
      +
    1. Let list be an empty list.
    2. +
    3. Let i be 0.
    4. +
    5. While i is less than or equal to the greatest supported property index on the object: +
        +
      1. Let V be the value of the indexed property with index i + if i is a supported property index, or null otherwise.
      2. +
      3. Append to list the value that is the result of + converting + V to a serialized value.
      4. +
      5. Set i to i + 1.
      6. +
      +
    6. +
    7. Return map.
    8. +
    +
  • +
  • +

    A single attribute:

    +
    serializer = attribute-identifier;
    +

    The identifier MUST be the identifier of an attribute declared + on the interface, and this attribute MUST have a + serializable type.

    +

    The serialization behavior for this + form of serialization pattern is as follows:

    +
      +
    1. Let V be the value of the attribute with the specified identifier.
    2. +
    3. Return the result of converting + V to a serialized value.
    4. +
    +
  • +
+ +
Note
+

+ Entries are added to maps in a particular order so that in the ECMAScript language binding + it is defined what order properties are added to objects. This is because this order + can influence the serialization that JSON.stringify can produce. +

+
+ +

The list of serializable types and how they are + converted to serialized values is as follows:

+
+
long long
+
converted by choosing the closest equivalent double value + (as when converting a long long to an ECMAScript Number value)
+
unsigned long long
+
converted by choosing the closest equivalent double value + (as when converting a unsigned long long to an ECMAScript Number value)
+
any other integer type
+
float
+
converted by choosing the equivalent double value
+
double
+
boolean
+
DOMString
+
the same value of the respective type
+
an enumeration type
+
the equivalent DOMString value
+
a USVString
+
the DOMString produced by + encoding the given sequence of Unicode scalar values in + UTF-16
+
a ByteString
+
the equivalent DOMString value where each code unit has the same value as the corresponding byte value
+
a nullable serializable type
+
converted to null if that is its value, + otherwise converted as per its inner type
+
a union type where + all of its member types + are serializable types
+
converted as per its specific type
+
a sequence type that + has a serializable type as its element type
+
converted to a list where each element is the result of converting its + corresponding sequence element to a serialized value
+
a dictionary where + all of its members have + serializable types
+
converted to a map consisting of an entry for each dictionary member + that is present, where the entry’s key is the identifier of the dictionary + member and its value is the result of converting the dictionary member’s + value to a serializable type
+
an interface type that has a + serializer
+
converted by invoking the object’s serializer
+
+ +

+ Serializers can also be specified using an operation + with the serializer keyword: +

+
serializer type identifier();
+

+ Serializers declared with operations MUST + be declared to take zero arguments and return a serializable type. +

+

+ The serialization behavior + of the interface with a serializer declared with an operation is the result of + converting + the value returned from invoking the operation to a serialized value. +

+ +
[30]Serializer"serializer" SerializerRest
[31]SerializerRestOperationRest
 | + "=" SerializationPattern ";"
 | + ";"
[32]SerializationPattern"{" SerializationPatternMap "}"
 | + "[" SerializationPatternList "]"
 | + identifier
[33]SerializationPatternMap"getter"
 | + "inherit" Identifiers
 | + identifier Identifiers
 | + ε
[34]SerializationPatternList"getter"
 | + identifier Identifiers
 | + ε
[91]Identifiers"," identifier Identifiers
 | + ε
+ +
Example
+

+ The following IDL fragment defines + an interface Transaction that has a + serializer defines in prose: +

+
IDL
interface Transaction {
+  readonly attribute Account from;
+  readonly attribute Account to;
+  readonly attribute double amount;
+  readonly attribute DOMString description;
+  readonly attribute unsigned long number;
+
+  serializer;
+};
+
+interface Account {
+  DOMString name;
+  unsigned long number;
+};
+

+ The serializer could be defined as follows: +

+
+

+ The serialization behavior + of the Transaction interface is to run the following + algorithm, where O is the object that implements Transaction: +

+
    +
  1. Let map be an empty map.
  2. +
  3. Add an entry to map whose key is “from” and whose value is + the serialized value of + the number attribute on the Account + object referenced by the from attribute on O.
  4. +
  5. Add an entry to map whose key is “to” and whose value is + the serialized value of + the number attribute on the Account + object referenced by the from attribute on O.
  6. +
  7. For both of the attributes amount and description, + add an entry to map whose key is the + identifier of the attribute + and whose value is the serialized value + of the value of the attribute on O.
  8. +
  9. Return map.
  10. +
+
+

+ If it was acceptable for Account objects to be serializable + on their own, then serialization patterns + could be used to avoid having to define the serialization behavior + in prose: +

+
IDL
interface Transaction {
+  readonly attribute Account from;
+  readonly attribute Account to;
+  readonly attribute double amount;
+  readonly attribute DOMString description;
+  readonly attribute unsigned long number;
+
+  serializer = { from, to, amount, description };
+};
+
+interface Account {
+  DOMString name;
+  unsigned long number;
+
+  serializer = number;
+};
+

+ In the ECMAScript language binding, there would exist a toJSON method on + Transaction objects: +

+
ECMAScript
// Get an instance of Transaction.
+var txn = getTransaction();
+
+// Evaluates to an object like this:
+// {
+//   from: 1234
+//   to: 5678
+//   amount: 110.75
+//   description: "dinner"
+// }
+txn.toJSON();
+
+// Evaluates to a string like this:
+// '{"from":1234,"to":5678,"amount":110.75,"description":"dinner"}'
+JSON.stringify(txn);
+
+
+ +
+
3.2.4.4 Indexed properties
+ +

+ An interface that defines + an indexed property getter + is said to support indexed properties. +

+

+ If an interface supports indexed properties, + then the interface definition MUST be accompanied by + a description of what indices the object can be indexed with at + any given time. These indices are called the supported property indices. +

+

+ Indexed property getters MUST + be declared to take a single unsigned long argument. + Indexed property setters MUST + be declared to take two arguments, where the first is an unsigned long. +

+
getter type identifier(unsigned long identifier);
+setter type identifier(unsigned long identifier, type identifier);
+
+getter type (unsigned long identifier);
+setter type (unsigned long identifier, type identifier);
+

+ The following requirements apply to the definitions of indexed property getters and setters: +

+
    +
  • + If an indexed property getter was specified using an operation + with an identifier, + then the value returned when indexing the object with a given supported property index + is the value that would be returned by invoking the operation, passing + the index as its only argument. If the operation used to declare the indexed property getter + did not have an identifier, then the interface definition must be accompanied + by a description of how to determine the value of an indexed property + for a given index. +
  • +
  • + If an indexed property setter was specified using an operation + with an identifier, + then the behavior that occurs when indexing the object for property assignment with a given supported property index and value + is the same as if the operation is invoked, passing + the index as the first argument and the value as the second argument. If the operation used to declare the indexed property setter + did not have an identifier, then the interface definition must be accompanied + by a description of how to set the value of an existing indexed property + and how to set the value of a new indexed property + for a given property index and value. +
  • +
+ +
Note
+

+ Note that if an indexed property getter or + setter + is specified using an operation with an identifier, + then indexing an object with an integer that is not a supported property index + does not necessarily elicit the same behavior as invoking the operation with that index. The actual behavior in this + case is language binding specific. +

+

+ In the ECMAScript language binding, a regular property lookup is done. For example, take the following IDL: +

+
IDL
interface A {
+  getter DOMString toWord(unsigned long index);
+};
+

+ Assume that an object implementing A has supported property indices + in the range 0 ≤ index < 2. Also assume that toWord is defined to return + its argument converted into an English word. The behavior when invoking the + operation with an out of range index + is different from indexing the object directly: +

+
ECMAScript
var a = getA();
+
+a.toWord(0);  // Evalautes to "zero".
+a[0];         // Also evaluates to "zero".
+
+a.toWord(5);  // Evaluates to "five".
+a[5];         // Evaluates to undefined, since there is no property "5".
+
+ +
Example
+

+ The following IDL fragment defines an interface + OrderedMap which allows + retrieving and setting values by name or by index number: +

+
IDL
interface OrderedMap {
+  readonly attribute unsigned long size;
+
+  getter any getByIndex(unsigned long index);
+  setter void setByIndex(unsigned long index, any value);
+
+  getter any get(DOMString name);
+  setter void set(DOMString name, any value);
+};
+

+ Since all of the special operations are declared using + operations with identifiers, the only additional prose + that is necessary is that which describes what keys those sets + have. Assuming that the get() operation is + defined to return null if an + attempt is made to look up a non-existing entry in the + OrderedMap, then the following + two sentences would suffice: +

+
+

+ An object map implementing OrderedMap + supports indexed properties with indices in the range + 0 ≤ index < map.size. +

+

+ Such objects also support a named property for every name that, + if passed to get(), would return a non-null value. +

+
+

+ As described in section 4.7 , + an ECMAScript implementation would create + properties on a platform object implementing + OrderedMap that correspond to + entries in both the named and indexed property sets. + These properties can then be used to interact + with the object in the same way as invoking the object’s + methods, as demonstrated below: +

+
ECMAScript
// Assume map is a platform object implementing the OrderedMap interface.
+var map = getOrderedMap();
+var x, y;
+
+x = map[0];       // If map.length > 0, then this is equivalent to:
+                  //
+                  //   x = map.getByIndex(0)
+                  //
+                  // since a property named "0" will have been placed on map.
+                  // Otherwise, x will be set to undefined, since there will be
+                  // no property named "0" on map.
+
+map[1] = false;   // This will do the equivalent of:
+                  //
+                  //   map.setByIndex(1, false)
+
+y = map.apple;    // If there exists a named property named "apple", then this
+                  // will be equivalent to:
+                  //
+                  //   y = map.get('apple')
+                  //
+                  // since a property named "apple" will have been placed on
+                  // map.  Otherwise, y will be set to undefined, since there
+                  // will be no property named "apple" on map.
+
+map.berry = 123;  // This will do the equivalent of:
+                  //
+                  //   map.set('berry', 123)
+
+delete map.cake;  // If a named property named "cake" exists, then the "cake"
+                  // property will be deleted, and then the equivalent to the
+                  // following will be performed:
+                  //
+                  //   map.remove("cake")
+
+
+ +
+
3.2.4.5 Named properties
+ +

+ An interface that defines + a named property getter + is said to support named properties. +

+

+ If an interface supports named properties, + then the interface definition MUST be accompanied by + a description of the ordered set of names that can be used to index the object + at any given time. These names are called the + supported property names. +

+

+ Named property getters and deleters MUST + be declared to take a single DOMString argument. + Named property setters MUST + be declared to take two arguments, where the first is a DOMString. +

+
getter type identifier(DOMString identifier);
+setter type identifier(DOMString identifier, type identifier);
+deleter type identifier(DOMString identifier);
+
+getter type (DOMString identifier);
+setter type (DOMString identifier, type identifier);
+deleter type (DOMString identifier);
+

+ The following requirements apply to the definitions of named property getters, setters and deleters: +

+
    +
  • + If a named property getter was specified using an operation + with an identifier, + then the value returned when indexing the object with a given supported property name + is the value that would be returned by invoking the operation, passing + the name as its only argument. If the operation used to declare the named property getter + did not have an identifier, then the interface definition must be accompanied + by a description of how to determine the value of a named property + for a given property name. +
  • +
  • + If a named property setter was specified using an operation + with an identifier, + then the behavior that occurs when indexing the object for property assignment with a given supported property name and value + is the same as if the operation is invoked, passing + the name as the first argument and the value as the second argument. If the operation used to declare the named property setter + did not have an identifier, then the interface definition must be accompanied + by a description of how to set the value of an existing named property + and how to set the value of a new named property + for a given property name and value. +
  • +
  • + If a named property deleter was specified using an operation + with an identifier, + then the behavior that occurs when indexing the object for property deletion with a given supported property name + is the same as if the operation is invoked, passing + the name as the only argument. If the operation used to declare the named property deleter + did not have an identifier, then the interface definition must be accompanied + by a description of how to delete an existing named property + for a given property name. +
  • +
+ +
Note
+

+ As with indexed properties, + if an named property getter, + setter or + deleter + is specified using an operation with an identifier, + then indexing an object with a name that is not a supported property name + does not necessarily elicit the same behavior as invoking the operation with that name; the behavior + is language binding specific. +

+
+
+
+ +
+

3.2.5 Static attributes and operations

+ +

+ Static attributes and + static operations are ones that + are not associated with a particular instance of the + interface + on which it is declared, and is instead associated with the interface + itself. Static attributes and operations are declared by using the + static keyword in their declarations. +

+

+ It is language binding specific whether it is possible to invoke + a static operation or get or set a static attribute through a reference + to an instance of the interface. +

+

+ Static attributes and operations MUST NOT be + declared on callback interfaces. +

+ +
[37]StaticMember"static" StaticMemberRest
[38]StaticMemberRestReadOnly AttributeRest
 | + ReturnType OperationRest
+ +
Example
+

+ The following IDL fragment defines an interface + Circle that has a static + operation declared on it: +

+
IDL
interface Point { /* ... */ };
+
+interface Circle {
+  attribute double cx;
+  attribute double cy;
+  attribute double radius;
+
+  static readonly attribute long triangulationCount;
+  static Point triangulate(Circle c1, Circle c2, Circle c3);
+};
+

+ In the ECMAScript language binding, the Function object for + triangulate and the accessor property for triangulationCount + will exist on the interface object + for Circle: +

+
ECMAScript
var circles = getCircles();           // an Array of Circle objects
+
+typeof Circle.triangulate;            // Evaluates to "function"
+typeof Circle.triangulationCount;     // Evaluates to "number"
+Circle.prototype.triangulate;         // Evaluates to undefined
+Circle.prototype.triangulationCount;  // Also evaluates to undefined
+circles[0].triangulate;               // As does this
+circles[0].triangulationCount;        // And this
+
+// Call the static operation
+var triangulationPoint = Circle.triangulate(circles[0], circles[1], circles[2]);
+
+// Find out how many triangulations we have done
+window.alert(Circle.triangulationCount);
+ +
+
+ +
+

3.2.6 Overloading

+ +

+ If a regular operation + or static operation + defined on an interface + has an identifier + that is the same as the identifier of another operation on that + interface of the same kind (regular or static), then the operation is said to be + overloaded. When the identifier + of an overloaded operation is used to invoke one of the + operations on an object that implements the interface, the + number and types of the arguments passed to the operation + determine which of the overloaded operations is actually + invoked. If an interface has multiple + legacy callers defined on it, + then those legacy callers are also said to be overloaded. + In the ECMAScript language binding, constructors + can be overloaded too. There are some restrictions on the arguments + that overloaded operations, legacy callers and constructors can be + specified to take, and in order to describe these restrictions, + the notion of an effective overload set is used. +

+

+ Operations and legacy callers + MUST NOT be overloaded across interface + and partial interface definitions. +

+
Note
+

+ For example, the overloads for both f and g + are disallowed: +

+
IDL
interface A {
+  void f();
+};
+
+partial interface A {
+  void f(double x);
+  void g();
+};
+
+partial interface A {
+  void g(DOMString x);
+};
+

Note that the [Constructor] and + [NamedConstructor] + extended attributes are disallowed from appearing + on partial interface definitions, + so there is no need to also disallow overloading for constructors.

+
+

+ An effective overload set + represents the allowable invocations for a particular + operation, + constructor (specified with [Constructor] + or [NamedConstructor]), + legacy caller or + callback function. + The algorithm to compute an effective overload set + operates on one of the following six types of IDL constructs, and listed with them below are + the inputs to the algorithm needed to compute the set. +

+
+
For regular operations
+
For static operations
+
+ +
+
For legacy callers
+
+ +
+
For constructors
+
+ +
+
For named constructors
+
+ +
+
For callback functions
+
+ +
+
+

+ An effective overload set is used, among other things, to determine whether there are ambiguities in the + overloaded operations, constructors and callers specified on an interface. +

+

+ The elements of an effective overload set are tuples of the form + <callabletype list, optionality list>. If the effective overload + set is for regular operations, static operations or legacy callers, then callable is an operation; + if it is for constructors or named constructors, then callable is an + extended attribute; and if it is for callback functions, then callable + is the callback function itself. In all cases, type list is a list + of IDL types, and optionality list is a list of three possible optionality values – + “required”, “optional” or “variadic” – indicating whether + the argument at a given index was declared as being optional + or corresponds to a variadic argument. + Each tuple represents an allowable invocation of the operation, + constructor, legacy caller or callback function with an argument value list of the given types. + Due to the use of optional arguments + and variadic operations + and constructors, there may be multiple entries in an effective overload set identifying + the same operation or constructor. +

+

+ The algorithm below describes how to compute an effective overload set. + The following input variables are used, if they are required: +

+
    +
  • the identifier of the operation or named constructor is A
  • +
  • the argument count is N
  • +
  • the interface is I
  • +
  • the callback function is C
  • +
+

+ Whenever an argument of an extended + attribute is mentioned, it is referring to an argument of the + extended attribute’s named argument list. +

+
    +
  1. Initialize S to ∅.
  2. +
  3. Let F be a set with elements as follows, according to the kind of effective overload set: +
    +
    For regular operations
    +
    + The elements of F are the regular operations with + identifier A defined on interface I. +
    +
    For static operations
    +
    + The elements of F are the static operations with + identifier A defined on interface I. +
    +
    For constructors
    +
    + The elements of F are the + [Constructor] + extended attributes on interface I. +
    +
    For named constructors
    +
    + The elements of F are the + [NamedConstructor] + extended attributes on interface I whose + named argument lists’ + identifiers are A. +
    +
    For legacy callers
    +
    + The elements of F are the legacy callers + defined on interface I. +
    +
    For callback functions
    +
    + The single element of F is the callback function itself, C. +
    +
    +
  4. + +
  5. + Let maxarg be the maximum number of arguments the operations, constructor extended attributes or callback functions in F are declared to take. + For variadic operations and constructor extended attributes, + the argument on which the ellipsis appears counts as a single argument. +
    Note
    +

    So void f(long x, long... y); is considered to be declared to take two arguments.

    +
    +
  6. +
  7. Let m be the maximum of maxarg and N.
  8. +
  9. For each operation, extended attribute or callback function X in F: +
      +
    1. Let n be the number of arguments X is declared to take.
    2. +
    3. Let t0..n−1 be a list of types, where ti + is the type of X’s argument at index i.
    4. +
    5. Let o0..n−1 be a list of optionality values, where oi + is “variadic” if X’s argument at index i is a final, variadic argument, + “optional” if the argument is optional, + and “required” otherwise.
    6. +
    7. Add to S the tuple <Xt0..n−1, o0..n−1>.
    8. +
    9. If X is declared to be variadic, then: +
        +
      1. Add to S the tuple <Xt0..n−2o0..n−2>. +
        Note
        +

        This leaves off the final, variadic argument.

        +
        +
      2. +
      3. For every integer i, such that n ≤ i ≤ m−1: +
          +
        1. Let u0..i be a list of types, where uj = tj (for j < n) and uj = tn−1 (for j ≥ n).
        2. +
        3. Let p0..i be a list of optionality values, where pj = oj (for j < n) and pj = “variadic” (for j ≥ n).
        4. +
        5. Add to S the tuple <Xu0..ip0..i>.
        6. +
        +
      4. +
      +
    10. +
    11. Initialize i to n−1.
    12. +
    13. While i ≥ 0: +
        +
      1. If argument i of X is not optional, then break this loop.
      2. +
      3. Otherwise, add to S the tuple <Xt0..i−1o0..i−1>.
      4. +
      5. Set i to i−1.
      6. +
      +
    14. +
    15. If n > 0 and all arguments of X are optional, then add to S the tuple <X, (), ()> (where “()” represents the empty list).
    16. +
    +
  10. +
  11. + The effective overload set is S. +
  12. +
+
Example
+

+ For the following interface: +

+
IDL
interface A {
+  /* f1 */ void f(DOMString a);
+  /* f2 */ void f(Node a, DOMString b, double... c);
+  /* f3 */ void f();
+  /* f4 */ void f(Event a, DOMString b, optional DOMString c, double... d);
+};
+

+ assuming Node and Event + are two other interfaces of which no object can implement both, + the effective overload set + for regular operations with + identifier f and argument count 4 is: +

+
+ { <f1, (DOMString), (required)>,
+ <f2, (Node, DOMString), (required, required)>,
+ <f2, (Node, DOMString, double), (required, required, variadic)>,
+ <f2, (Node, DOMString, double, double), (required, required, variadic, variadic)>,
+ <f3, (), ()>,
+ <f4, (Event, DOMString), (required, required)>,
+ <f4, (Event, DOMString, DOMString), (required, required, optional)>,
+ <f4, (Event, DOMString, DOMString, double), (required, required, optional, variadic)> } +
+
+ +

+ Two types are distinguishable if + at most one of the two includes a nullable type + or is a dictionary type, + and at least one of the following three conditions is true: +

+
    +
  1. +

    + The two types (taking their inner types + if they are nullable types) appear + in the following table and there is a “●” mark in the corresponding entry + or there is a letter in the corresponding entry and the designated additional + requirement below the table is satisfied:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    booleannumeric typesstring typesinterfaceobjectcallback
    function
    dictionarysequence<T>exception typesbuffer source types
    boolean
    numeric types
    string types
    interface(a)(b)(b)
    object
    callback function
    dictionary
    sequence<T>
    exception types
    buffer source types
    +
      +
    1. The two identified interfaces are + not the same, it is not possible for a single platform object + to implement both interfaces, + and it is not the case that both are callback interfaces.
    2. +
    3. The interface type is not a callback interface.
    4. +
    +
  2. +
  3. + One type is a union type or nullable union type, + the other is neither a union type nor a nullable union type, and each + member type of the first is distinguishable + with the second. +
  4. +
  5. + Both types are either a union type or nullable union type, and each member type of the one + is distinguishable with each member type of the other. +
  6. +
+
Note
+

Promise types do not appear in the above table, and as a consequence + are not distinguishable with any other type.

+
+

+ If there is more than one entry in an effective overload set + that has a given type list length, then for those entries there + MUST be an index i such + that for each pair of entries the types at index i are + distinguishable. + The lowest such index is termed the distinguishing argument index + for the entries of the effective overload set with the given type list length. +

+
Example
+

+ Consider the effective overload set shown in the previous example. + There are multiple entries in the set with type lists 2, 3 and 4. + For each of these type list lengths, the distinguishing + argument index is 0, since Node and + Event are distinguishable. +

+

+ The following use of overloading however is invalid: +

+
IDL
interface B {
+  void f(DOMString x);
+  void f(double x);
+};
+

+ since DOMString and + double are not distinguishable. +

+
+

+ In addition, for each index j, where j is less than the + distinguishing argument index + for a given type list length, the types at index j in + all of the entries’ type lists MUST be the same + and the booleans in the corresponding list indicating argument optionality MUST + be the same. +

+
Example
+

The following is invalid:

+
IDL
interface B {
+  /* f1 */ void f(DOMString w);
+  /* f2 */ void f(long w, double x, Node y, Node z);
+  /* f3 */ void f(double w, double x, DOMString y, Node z);
+};
+

+ For argument count 4, the effective overload set is: +

+
+ { <f1, (DOMString), (required)>,
+ <f2, (long, double, Node, Node), (required, required, required, required)>,
+ <f3, (double, double, DOMString, Node), (required, required, required, required)> } +
+

+ Looking at entries with type list length 4, the + distinguishing argument index + is 2, since Node and + DOMString are distinguishable. + However, since the arguments in these two overloads at index 0 are different, + the overloading is invalid. +

+
+ +
+ +
+

3.2.7 Iterable declarations

+ +

+ An interface can be declared to be + iterable by using an iterable declaration + (matching Iterable) in the body of the interface. +

+
iterable<value-type>;
+iterable<key-type, value-type>;
+

+ Objects implementing an interface that is declared to be iterable + support being iterated over to obtain a sequence of values. +

+
Note
+

In the ECMAScript language binding, an interface that is iterable + will have “entries”, “keys”, “values” and @@iterator + properties on its interface prototype object.

+
+

+ If a single type parameter is given, then the interface has a + value iterator and provides + values of the specified type. + If two type parameters are given, then the interface has a + pair iterator and provides + value pairs, where the first value is a key and the second is the + value associated with the key. +

+

+ Prose accompanying an interface with a value iterator + MUST define what the + list of values to iterate over is, + unless the interface also + supports indexed properties, + in which case the values of the indexed properties are implicitly + iterated over. Prose accompanying an interface with a + pair iterator + MUST define what the list of + value pairs to iterate over + is. +

+
Note
+

Interfaces that support indexed properties + need to have a “length” attribute for the iterator to work correctly.

+
+

+ The prose is responsible for defining that the list of values + or value pairs to iterate over is snapshotted at the time + iteration begins, if that is desired. To handle lists that + can change during iteration, the behavior of an + iterator defined to to loop through the items in order, starting + at index 0, and advancing this index on each iteration. Iteration ends when + the index has gone past the end of the list. +

+
Note
+

This is how array iterator objects work. + For interfaces that support indexed properties, + the iterator objects returned by “entries”, “keys”, “values” and @@iterator are + actual array iterator objects.

+
+

+ Interfaces with iterable declarations MUST NOT + have any interface members + named “entries”, “keys” or “values”, + or have any inherited + or consequential + interfaces that have interface members with these names. +

+ +
Example
+

Consider the following interface SessionManager, which allows access to + a number of Session objects:

+
IDL
interface SessionManager {
+  Session getSessionForUser(DOMString username);
+  readonly attribute unsigned long sessionCount;
+
+  iterable<Session>;
+};
+
+interface Session {
+  readonly attribute DOMString username;
+  // ...
+};
+

+ The behavior of the iterator could be defined like so: +

+
+

+ The values to iterate over + are a snapshot of the open Session objects + on the SessionManager sorted by username. +

+
+

+ In the ECMAScript language binding, the interface prototype object + for the SessionManager interface + has a values method that is a function, which, when invoked, + returns an iterator object that itself has a next method that returns the + next value to be iterated over. It has values and entries + methods that iterate over the indexes of the list of session objects + and [index, session object] pairs, respectively. It also has + a @@iterator method that allows a SessionManager + to be used in a for..of loop: +

+
ECMAScript
// Get an instance of SessionManager.
+// Assume that it has sessions for two users, "anna" and "brian".
+var sm = getSessionManager();
+
+typeof SessionManager.prototype.values;            // Evaluates to "function"
+var it = sm.values();                              // values() returns an iterator object
+String(it);                                        // Evaluates to "[object SessionManager Iterator]"
+typeof it.next;                                    // Evaluates to "function"
+
+// This loop will alert "anna" and then "brian".
+for (;;) {
+  let result = it.next();
+  if (result.done) {
+    break;
+  }
+  let session = result.value;
+  window.alert(session.username);
+}
+
+// This loop will also alert "anna" and then "brian".
+for (let session of sm) {
+  window.alert(session.username);
+}
+

+ If the SessionManager interface supported indexed properties + and had an attribute named “length” + that reflected the number of session objects, we could avoid defining the + values to iterate over. +

+
+ +

+ An interface MUST NOT have more than one + iterable declaration. + The inherited + and consequential + interfaces of an interface with an + iterable declaration + MUST NOT also have an + iterable declaration. +

+

+ The following extended attributes are applicable to iterable declarations: + [Exposed]. +

+ + +
[59]Iterable"iterable" "<" Type OptionalType ">" ";"
[60]OptionalType"," Type
 | + ε
+
+ + + + +
+ +
+

3.3 Dictionaries

+ +

+ A dictionary is a definition (matching + Dictionary) + used to define an associative array data type with a fixed, ordered set of key–value pairs, + termed dictionary members, + where keys are strings and values are of a particular type specified in the definition. +

+
dictionary identifier {
+  dictionary-members…
+};
+

+ Dictionaries are always passed by value. In language bindings where a dictionary is represented by an object of some kind, passing a + dictionary to a platform object will not result in a reference to the dictionary being kept by that object. + Similarly, any dictionary returned from a platform object will be a copy and modifications made to it will not be visible to the platform object. +

+

+ A dictionary can be defined to inherit from another dictionary. + If the identifier of the dictionary is followed by a colon and a identifier, + then that identifier identifies the inherited dictionary. The identifier + MUST identify a dictionary. +

+

+ A dictionary MUST NOT be declared such that + its inheritance hierarchy has a cycle. That is, a dictionary + A cannot inherit from itself, nor can it inherit from another + dictionary B that inherits from A, and so on. +

+
dictionary Base {
+  dictionary-members…
+};
+
+dictionary Derived : Base {
+  dictionary-members…
+};
+

+ The inherited dictionaries of + a given dictionary D is the set of all dictionaries that D + inherits from, directly or indirectly. If D does not inherit + from another dictionary, then the set is empty. Otherwise, the set + includes the dictionary E that D inherits + from and all of E’s inherited dictionaries. +

+

+ A dictionary value of type D can have key–value pairs corresponding + to the dictionary members defined on D and on any of D’s + inherited dictionaries. + On a given dictionary value, the presence of each dictionary member + is optional, unless that member is specified as required. + When specified in the dictionary value, a dictionary member is said to be + present, otherwise it is not present. + Dictionary members can also optionally have a default value, which is + the value to use for the dictionary member when passing a value to a + platform object that does + not have a specified value. Dictionary members with default values are + always considered to be present. +

+
Warning
+

+ As with operation argument default values, + is strongly suggested not to use of true as the + default value for + boolean-typed + dictionary members, + as this can be confusing for authors who might otherwise expect the default + conversion of undefined to be used (i.e., false). +

+
+

+ Each dictionary member (matching + DictionaryMember) is specified + as a type (matching Type) followed by an + identifier + (given by an identifier token following + the type). The identifier is the key name of the key–value pair. + If the Type + is an identifier + followed by ?, then the identifier + MUST identify an + interface, enumeration, + callback function or typedef. + If the dictionary member type is an identifier + not followed by ?, then the identifier MUST + identify any one of those definitions or a dictionary. +

+
dictionary identifier {
+  type identifier;
+};
+

+ If the identifier is followed by a U+003D EQUALS SIGN ("=") + and a value (matching DefaultValue), + then that gives the dictionary member its default value. +

+
dictionary identifier {
+  type identifier = value;
+};
+

+ When a boolean literal token (true or false), + the null token, + an integer token, a + float token, + one of the three special floating point literal values (Infinity, + -Infinity or NaN), + a string token or + the two token sequence [] used as the + default value, + it is interpreted in the same way as for an operation’s + optional argument default value. +

+

+ If the type of the dictionary member + is an enumeration, then its + default value if specified MUST + be one of the enumeration’s values. +

+

+ If the type of the dictionary member is preceded by the + required keyword, the member is considered a + required dictionary member + and must be present on the dictionary. A + required dictionary + member MUST NOT have a default value. +

+
dictionary identifier {
+  required type identifier;
+};
+

+ The type of a dictionary member MUST NOT include + the dictionary it appears on. A type includes a dictionary D + if at least one of the following is true: +

+
    +
  • the type is D
  • +
  • the type is a dictionary that inherits from D
  • +
  • the type is a nullable type + whose inner type includes D
  • +
  • the type is a sequence type + whose element type includes D
  • +
  • the type is a union type, + one of whose member types + includes D
  • +
  • the type is a dictionary, one of whose members or inherited members has + a type that includes D
  • +
+

+ As with interfaces, the IDL for dictionaries can be split into multiple parts + by using partial dictionary definitions + (matching "partial" Dictionary). + The identifier of a partial + dictionary definition MUST be the same as the + identifier of a dictionary definition. All of the members that appear on each + of the partial dictionary definitions are considered to be members of + the dictionary itself. +

+
dictionary SomeDictionary {
+  dictionary-members…
+};
+
+partial dictionary SomeDictionary {
+  dictionary-members…
+};
+
Note
+

As with partial interface definitions, partial dictionary definitions are intended for use as a specification + editorial aide, allowing the definition of an interface to be separated + over more than one section of the document, and sometimes multiple documents.

+
+

+ The order of the dictionary members + on a given dictionary is such that inherited dictionary members are ordered + before non-inherited members, and the dictionary members on the one + dictionary definition (including any partial dictionary definitions) are + ordered lexicographically by the Unicode codepoints that comprise their + identifiers. +

+
Note
+

For example, with the following definitions:

+
IDL
dictionary B : A {
+  long b;
+  long a;
+};
+
+dictionary A {
+  long c;
+  long g;
+};
+
+dictionary C : B {
+  long e;
+  long f;
+};
+
+partial dictionary A {
+  long h;
+  long d;
+};
+

+ the order of the dictionary members + of a dictionary value of type C is + c, d, g, h, a, b, e, f. +

+

+ Dictionaries are required to have their members ordered because + in some language bindings the behavior observed when passing + a dictionary value to a platform object depends on the order + the dictionary members are fetched. For example, consider the + following additional interface: +

+
IDL
interface Something {
+  void f(A a);
+};
+

+ and this ECMAScript code: +

+
ECMAScript
var something = getSomething();  // Get an instance of Something.
+var x = 0;
+
+var dict = { };
+Object.defineProperty(dict, "d", { get: function() { return ++x; } });
+Object.defineProperty(dict, "c", { get: function() { return ++x; } });
+
+something.f(dict);
+

+ The order that the dictionary members are fetched in determines + what values they will be taken to have. Since the order for + A is defined to be c then d, + the value for c will be 1 and the value for d will be 2. +

+
+

+ The identifier of a dictionary member MUST NOT be + the same as that of another dictionary member defined on the dictionary or + on that dictionary’s inherited dictionaries. +

+

+ Dictionaries MUST NOT be used as the type of an + attribute or + constant. +

+

+ The following extended attributes are applicable to dictionaries: + [Constructor], + [Exposed]. +

+

+ The following extended attributes are applicable to dictionary members: + [Clamp], + [EnforceRange]. +

+
[6]Partial"partial" PartialDefinition
[7]PartialDefinitionPartialInterface
 | + PartialDictionary
[11]Dictionary"dictionary" identifier Inheritance "{" DictionaryMembers "}" ";"
[12]DictionaryMembersExtendedAttributeList DictionaryMember DictionaryMembers
 | + ε
[13]DictionaryMemberRequired Type identifier Default ";"
[15]PartialDictionary"dictionary" identifier "{" DictionaryMembers "}" ";"
[16]Default"=" DefaultValue
 | + ε
[17]DefaultValueConstValue
 | + string
 | + "[" "]"
[18]Inheritance":" identifier
 | + ε
+
Example
+

+ One use of dictionary types is to allow a number of optional arguments to + an operation without being + constrained as to the order they are specified at the call site. For example, + consider the following IDL fragment: +

+
IDL
[Constructor]
+interface Point {
+  attribute double x;
+  attribute double y;
+};
+
+dictionary PaintOptions {
+  DOMString? fillPattern = "black";
+  DOMString? strokePattern = null;
+  Point position;
+};
+
+interface GraphicsContext {
+  void drawRectangle(double width, double height, optional PaintOptions options);
+};
+

+ In an ECMAScript implementation of the IDL, an Object + can be passed in for the optional PaintOptions dictionary: +

+
ECMAScript
// Get an instance of GraphicsContext.
+var ctx = getGraphicsContext();
+
+// Draw a rectangle.
+ctx.drawRectangle(300, 200, { fillPattern: "red", position: new Point(10, 10) });
+

+ Both fillPattern and strokePattern are given default values, + so if they are omitted, the definition of drawRectangle can assume that they + have the given default values and not include explicit wording to handle + their non-presence. +

+
+
+ +
+

3.4 Exceptions

+ +

+ An exception is a type of object that + represents an error and which can be thrown or treated as a first + class value by implementations. Web IDL does not allow exceptions + to be defined, but instead has a number of pre-defined exceptions + that specifications can reference and throw in their definition of + operations, attributes, and so on. Exceptions have an + error name, + a DOMString, + which is the type of error the exception represents, and a + message, which is an optional, + user agent-defined value that provides human readable details of the error. +

+

+ There are two kinds of exceptions available to be thrown from specifications. + The first is a simple exception, which + is identified by one of the following names: +

+
    +
  • Error
  • +
  • EvalError
  • +
  • RangeError
  • +
  • ReferenceError
  • +
  • TypeError
  • +
  • URIError
  • +
+

+ These correspond to all of the ECMAScript error objects ( + [ECMA-262] + , section 19.5) (apart from + SyntaxError, which is deliberately omitted as + it is for use only by the ECMAScript parser). + The meaning of + each simple exception matches + its corresponding Error object in the + ECMAScript specification. +

+

+ The second kind of exception is a DOMException, + which is an exception that encapsulates a name and an optional integer code, + for compatibility with historically defined exceptions in the DOM. +

+

+ For simple exceptions, + the error name is the name + of the exception. + For a DOMException, + the error name MUST + be one of the names listed in the error names table + below. The table also indicates the DOMException's integer code + for that error name, if it has one. +

+

+ There are two types that can be used to refer to + exception objects: Error, which encompasses all exceptions, + and DOMException which includes just DOMException objects. + This allows for example an operation + to be declared to have a DOMException + return type or an attribute + to be of type Error. +

+

+ Exceptions can be created by providing its + error name. + Exceptions can also be thrown, by providing the + same details required to create one. +

+

+ The resulting behavior from creating and throwing an exception is language binding-specific. +

+
Note
+

+ See section 4.12 + for details on what creating and throwing an exception + entails in the ECMAScript language binding. +

+
+
Example
+

+ Here is are some examples of wording to use to create and throw exceptions. + To throw a new simple exception named + TypeError: +

+
+

Throw a TypeError.

+
+

+ To throw a new DOMException with + error name + IndexSizeError: +

+
+

Throw an IndexSizeError.

+
+

+ To create a new DOMException with + error name + SyntaxError: +

+
+

Let object be a newly created SyntaxError.

+
+
+ +
+

3.4.1 Error names

+ +

+ The error names table below lists all the allowed error names + for DOMExceptions, a description, + and legacy code values. +

+ +
Note
+

If an error name is not listed here, please file a bug as indicated at the top of this specification and it will be addressed shortly. Thanks!

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameDescriptionLegacy code name and value
"IndexSizeError"The index is not in the allowed range.INDEX_SIZE_ERR (1)
"HierarchyRequestError"The operation would yield an incorrect node tree.HIERARCHY_REQUEST_ERR (3)
"WrongDocumentError"The object is in the wrong document.WRONG_DOCUMENT_ERR (4)
"InvalidCharacterError"The string contains invalid characters.INVALID_CHARACTER_ERR (5)
"NoModificationAllowedError"The object can not be modified.NO_MODIFICATION_ALLOWED_ERR (7)
"NotFoundError"The object can not be found here.NOT_FOUND_ERR (8)
"NotSupportedError"The operation is not supported.NOT_SUPPORTED_ERR (9)
"InUseAttributeError"The attribute is in use.INUSE_ATTRIBUTE_ERR (10)
"InvalidStateError"The object is in an invalid state.INVALID_STATE_ERR (11)
"SyntaxError"The string did not match the expected pattern.SYNTAX_ERR (12)
"InvalidModificationError"The object can not be modified in this way.INVALID_MODIFICATION_ERR (13)
"NamespaceError"The operation is not allowed by Namespaces in XML. [XMLNS]NAMESPACE_ERR (14)
"InvalidAccessError"The object does not support the operation or argument.INVALID_ACCESS_ERR (15)
"SecurityError"The operation is insecure.SECURITY_ERR (18)
"NetworkError"A network error occurred.NETWORK_ERR (19)
"AbortError"The operation was aborted.ABORT_ERR (20)
"URLMismatchError"The given URL does not match another URL.URL_MISMATCH_ERR (21)
"QuotaExceededError"The quota has been exceeded.QUOTA_EXCEEDED_ERR (22)
"TimeoutError"The operation timed out.TIMEOUT_ERR (23)
"InvalidNodeTypeError"The supplied node is incorrect or has an incorrect ancestor for this operation.INVALID_NODE_TYPE_ERR (24)
"DataCloneError"The object can not be cloned.DATA_CLONE_ERR (25)
"EncodingError"The encoding operation (either encoded or decoding) failed.
"NotReadableError"The I/O read operation failed.
"UnknownError"The operation failed for an unknown transient reason (e.g. out of memory).
"ConstraintError"A mutation operation in a transaction failed because a constraint was not satisfied.
"DataError"Provided data is inadequate.
"TransactionInactiveError"A request was placed against a transaction which is currently not active, or which is finished.
"ReadOnlyError"The mutating operation was attempted in a "readonly" transaction.
"VersionError"An attempt was made to open a database using a lower version than the existing version.
"OperationError"The operation failed for an operation-specific reason.
+
+
+ +
+

3.5 Enumerations

+ +

+ An enumeration is a definition (matching + Enum) used to declare a type + whose valid values are a set of predefined strings. Enumerations + can be used to restrict the possible + DOMString values that can be assigned to an + attribute or passed to an + operation. +

+
enum identifier { enumeration-values… };
+

+ The enumeration values are specified + as a comma-separated list of string literals. + The list of enumeration values + MUST NOT include duplicates. +

+
Warning
+

+ It is strongly suggested that enumeration values be all lowercase, + and that multiple words be separated using dashes or not be + separated at all, unless there is a specific reason to use another + value naming scheme. For example, an enumeration value that + indicates an object should be created could be named + "createobject" or 'create-object". + Consider related uses of enumeration values when deciding whether + to dash-separate or not separate enumeration value words so that + similar APIs are consistent. +

+
+

+ The behavior when a string value that is not one a valid enumeration value + is used when assigning to an attribute, + or passed as an operation argument, + whose type is the enumeration, is language binding specific. +

+
Note
+

+ In the ECMAScript binding, assignment of an invalid string value to an + attribute is ignored, while + passing such a value as an operation argument + results in an exception being thrown. +

+
+

+ No extended attributes + defined in this specification are applicable to enumerations. +

+ +
[19]Enum"enum" identifier "{" EnumValueList "}" ";"
[20]EnumValueListstring EnumValueListComma
[21]EnumValueListComma"," EnumValueListString
 | + ε
[22]EnumValueListStringstring EnumValueListComma
 | + ε
+ +
Example
+

+ The following IDL fragment + defines an enumeration + that is used as the type of an attribute + and an operation argument: +

+
IDL
enum MealType { "rice", "noodles", "other" };
+
+interface Meal {
+  attribute MealType type;
+  attribute double size;     // in grams
+
+  void initialize(MealType type, double size);
+};
+

+ An ECMAScript implementation would restrict the strings that can be + assigned to the type property or passed to the initializeMeal function + to those identified in the enumeration. +

+
ECMAScript
var meal = getMeal();                // Get an instance of Meal.
+
+meal.initialize("rice", 200);        // Operation invoked as normal.
+
+try {
+  meal.initialize("sandwich", 100);  // Throws a TypeError.
+} catch (e) {
+}
+
+meal.type = "noodles";               // Attribute assigned as normal.
+meal.type = "dumplings";             // Attribute assignment ignored.
+meal.type == "noodles";              // Evaluates to true.
+
+
+ +
+

3.6 Callback functions

+ + +

+ A callback function is a definition (matching + "callback" CallbackRest) used to declare a function type. +

+
callback identifier = return-type (arguments…);
+
Note
+

See also the similarly named callback interfaces.

+
+

+ The identifier on the + left of the equals sign gives the name of the callback function + and the return type and argument list (matching ReturnType + and ArgumentList) on the right side of the equals + sign gives the signature of the callback function type. +

+

+ Callback functions MUST NOT + be used as the type of a constant. +

+

+ The following extended attribute is applicable to callback functions: + [TreatNonObjectAsNull]. +

+ +
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | + Interface
[4]CallbackRestOrInterfaceCallbackRest
 | + Interface
[23]CallbackRestidentifier "=" ReturnType "(" ArgumentList ")" ";"
+ +
Example
+

+ The following IDL fragment defines + a callback function used for an API that + invokes a user-defined function when an operation is complete. +

+
IDL
callback AsyncOperationCallback = void (DOMString status);
+
+interface AsyncOperations {
+  void performOperation(AsyncOperationCallback whenFinished);
+};
+

+ In the ECMAScript language binding, a Function object is + passed as the operation argument. +

+
ECMAScript
var ops = getAsyncOperations();  // Get an instance of AsyncOperations.
+
+ops.performOperation(function(status) {
+  window.alert("Operation finished, status is " + status + ".");
+});
+
+
+ +
+

3.7 Typedefs

+ +

+ A typedef is a definition (matching + Typedef) + used to declare a new name for a type. This new name is not exposed + by language bindings; it is purely used as a shorthand for referencing + the type in the IDL. +

+
typedef type identifier;
+

+ The type being given a new name is specified after the typedef + keyword (matching Type), and the + identifier token following the + type gives the name. +

+

+ The Type MUST NOT + identify the same or another typedef. +

+

+ No extended attributes + defined in this specification are applicable to typedefs. +

+ +
[24]Typedef"typedef" Type identifier ";"
+ +
Example
+

+ The following IDL fragment + demonstrates the use of typedefs + to allow the use of a short + identifier instead of a long + sequence type. +

+
IDL
interface Point {
+  attribute double x;
+  attribute double y;
+};
+
+typedef sequence<Point> Points;
+
+interface Widget {
+  boolean pointWithinBounds(Point p);
+  boolean allPointsWithinBounds(Points ps);
+};
+
+
+ +
+

3.8 Implements statements

+ +

+ An implements statement is a definition + (matching ImplementsStatement) + used to declare that all objects implementing an interface A + (identified by the first identifier) + MUST additionally implement interface B + (identified by the second identifier), including all other interfaces that + B inherits from. +

+
identifier-A implements identifier-B;
+

+ Transitively, if objects implementing B + are declared with an implements statement + to additionally implement interface C, then all objects implementing + A do additionally implement interface C. +

+

+ The two identifiers MUST + identify two different interfaces. +

+

+ The interface identified on the left-hand side of an implements statement + MUST NOT inherit + from the interface identifier on the right-hand side, and vice versa. Both identified + interfaces also MUST NOT be + callback interfaces. +

+

+ If each implements statement is + considered to be an edge in a directed graph, from a node representing the interface + on the left-hand side of the statement to a node representing the interface on the + right-hand side, then this graph MUST NOT have any cycles. +

+

+ Interfaces that a given object implements are partitioned into those that are considered + supplemental interfaces and those that are not. + An interface A is considered to be a + supplemental interface of an object + O if: +

+
    +
  • O implements a different interface B, and the IDL states that + B implements A; or
  • +
  • O implements a different supplemental interface + C, and C inherits from A.
  • +
+
Note
+

+ Specification authors are discouraged from writing implements statements + where the interface on the left-hand side + is a supplemental interface. + For example, if author 1 writes: +

+
IDL
interface Window { ... };
+interface SomeFunctionality { ... };
+Window implements SomeFunctionality;
+

+ and author 2 later writes: +

+
IDL
interface Gizmo { ... };
+interface MoreFunctionality { ... };
+SomeFunctionality implements MoreFunctionality;
+Gizmo implements SomeFunctionality;
+

+ then it might be the case that author 2 is unaware of exactly which + interfaces already are used on the left-hand side of an + implements SomeFunctionality statement, and so has + required more objects implement MoreFunctionality + than he or she expected. +

+

+ Better in this case would be for author 2 to write: +

+
IDL
interface Gizmo { ... };
+interface MoreFunctionality { ... };
+Gizmo implements SomeFunctionality;
+Gizmo implements MoreFunctionality;
+
+

+ The consequential interfaces of an interface + A are: +

+
    +
  • each interface B where the IDL states A implements B;
  • +
  • each interface that a consequential interface of A inherits from; and
  • +
  • each interface D where the IDL states that C implements D, + where C is a consequential interface of A.
  • +
+

+ For a given interface, there MUST NOT + be any member defined on any of its consequential interfaces + whose identifier is the same as any other member defined on any + of those consequential interfaces or on the original interface itself. +

+
Note
+

For example, that precludes the following:

+
IDL
interface A { attribute long x; };
+interface B { attribute long x; };
+A implements B;  // B::x would clash with A::x
+
+interface C { attribute long y; };
+interface D { attribute long y; };
+interface E : D { };
+C implements E;  // D::y would clash with C::y
+
+interface F { };
+interface H { attribute long z; };
+interface I { attribute long z; };
+F implements H;
+F implements I;  // H::z and I::z would clash when mixed in to F
+
+

+ No extended attributes + defined in this specification are applicable to + implements statements. +

+ +
[25]ImplementsStatementidentifier "implements" identifier ";"
+ +
Example
+

+ The following IDL fragment + defines two interfaces, stating + that one interface is always implemented on objects implementing the other. +

+
IDL
interface Entry {
+  readonly attribute unsigned short entryType;
+  // ...
+};
+
+interface Observable {
+  void addEventListener(DOMString type,
+                        EventListener listener,
+                        boolean useCapture);
+  // ...
+};
+
+Entry implements Observable;
+

+ An ECMAScript implementation would thus have an “addEventListener” + property in the prototype chain of every Entry: +

+
ECMAScript
var e = getEntry();          // Obtain an instance of Entry.
+typeof e.addEventListener;  // Evaluates to "function".
+ +

+ Note that it is not the case that all Observable + objects implement Entry. +

+
+
+ +
+

3.9 Objects implementing interfaces

+ +

+ In a given implementation of a set of IDL fragments, + an object can be described as being a platform object, a + user object, or neither. There are two kinds of + object that are considered to be platform objects: +

+ +

+ In a browser, for example, + the browser-implemented DOM objects (implementing interfaces such as Node and + Document) that provide access to a web page’s contents + to ECMAScript running in the page would be platform objects. These objects might be exotic objects, + implemented in a language like C++, or they might be native ECMAScript objects. Regardless, + an implementation of a given set of IDL fragments needs to be able to recognize all platform objects + that are created by the implementation. This might be done by having some internal state that records whether + a given object is indeed a platform object for that implementation, or perhaps by observing + that the object is implemented by a given internal C++ class. How exactly platform objects + are recognised by a given implementation of a set of IDL fragments is implementation specific. +

+

+ All other objects in the system would not be treated as platform objects. For example, assume that + a web page opened in a browser loads an ECMAScript library that implements DOM Core. This library + would be considered to be a different implementation from the browser provided implementation. + The objects created by the ECMAScript library that implement the Node interface + will not be treated as platform objects that implement Node by the browser implementation. +

+

+ User objects are those that authors would create, implementing + callback interfaces that the Web APIs use to be able to invoke author-defined + operations or to send and receive values to the author’s program through + manipulating the object’s attributes. In a web page, an ECMAScript object + that implements the EventListener interface, which is + used to register a callback that the DOM Events implementation invokes, would be considered + to be a user object. +

+

+ Note that user objects can only implement callback interfaces + and platform objects can only implement non-callback interfaces. +

+ +
+ + + +
+

3.10 Types

+ +

+ This section lists the types supported by Web IDL, the set of values + corresponding to each type, and how constants + of that type are represented. +

+

+ The following types are known as integer types: + byte, + octet, + short, + unsigned short, + long, + unsigned long, + long long and + unsigned long long. +

+

+ The following types are known as numeric types: + the integer types, + float, + unresticted float, + double and + unrestricted double. +

+

+ The primitive types are + boolean and the numeric types. +

+

+ The string types are + DOMString, all enumeration types, + ByteString and USVString. +

+

+ The exception types are + Error and DOMException. +

+

+ The typed array types are + Int8Array, + Int16Array, + Int32Array, + Uint8Array, + Uint16Array, + Uint32Array, + Uint8ClampedArray, + Float32Array and + Float64Array. +

+

+ The buffer source types + are ArrayBuffer, + DataView, + and the typed array types. +

+

+ The object type, + all interface types + and the exception types + are known as object types. +

+

+ Every type has a type name, which + is a string, not necessarily unique, that identifies the type. + Each sub-section below defines what the type name is for each + type. +

+

+ When conversions are made from language binding specific types to + IDL types in order to invoke an operation + or assign a value to an attribute, + all conversions necessary will be performed before the + specified functionality of the operation or attribute assignment + is carried out. If the conversion cannot + be performed, then the operation will not run or + the attribute will not be updated. In some language bindings, + type conversions could result in an exception being thrown. + In such cases, these exceptions will be propagated to the + code that made the attempt to invoke the operation or + assign to the attribute. +

+ +
[73]TypeSingleType
 | + UnionType Null
[74]SingleTypeNonAnyType
 | + "any"
[75]UnionType"(" UnionMemberType "or" UnionMemberType UnionMemberTypes ")"
[76]UnionMemberTypeNonAnyType
 | + UnionType Null
[77]UnionMemberTypes"or" UnionMemberType UnionMemberTypes
 | + ε
[78]NonAnyTypePrimitiveType Null
 | + PromiseType Null
 | + "ByteString" Null
 | + "DOMString" Null
 | + "USVString" Null
 | + identifier Null
 | + "sequence" "<" Type ">" Null
 | + "object" Null
 | + "Error" Null
 | + "DOMException" Null
 | + BufferRelatedType Null
[80]ConstTypePrimitiveType Null
 | + identifier Null
[81]PrimitiveTypeUnsignedIntegerType
 | + UnrestrictedFloatType
 | + "boolean"
 | + "byte"
 | + "octet"
[82]UnrestrictedFloatType"unrestricted" FloatType
 | + FloatType
[83]FloatType"float"
 | + "double"
[84]UnsignedIntegerType"unsigned" IntegerType
 | + IntegerType
[85]IntegerType"short"
 | + "long" OptionalLong
[86]OptionalLong"long"
 | + ε
[87]PromiseType"Promise" "<" ReturnType ">"
[88]Null"?"
 | + ε
+ +
+

3.10.1 any

+ +

+ The any type is the union of all other possible + non-union types. + Its type name is “Any”. +

+

+ The any type is like + a discriminated union type, in that each of its values has a + specific non-any type + associated with it. For example, one value of the + any type is the + unsigned long + 150, while another is the long 150. + These are distinct values. +

+

+ The particular type of an any + value is known as its specific type. + (Values of union types also have + specific types.) +

+
+ +
+

3.10.2 boolean

+ +

+ The boolean type has two values: + true and false. +

+

+ boolean constant values in IDL are + represented with the true and + false tokens. +

+

+ The type name of the + boolean type is “Boolean”. +

+
+ +
+

3.10.3 byte

+ +

+ The byte type is a signed integer + type that has values in the range [−128, 127]. +

+

+ byte constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + byte type is “Byte”. +

+
+ +
+

3.10.4 octet

+ +

+ The octet type is an unsigned integer + type that has values in the range [0, 255]. +

+

+ octet constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + octet type is “Octet”. +

+
+ +
+

3.10.5 short

+ +

+ The short type is a signed integer + type that has values in the range [−32768, 32767]. +

+

+ short constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + short type is “Short”. +

+
+ +
+

3.10.6 unsigned short

+ +

+ The unsigned short type is an unsigned integer + type that has values in the range [0, 65535]. +

+

+ unsigned short constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + unsigned short type is “UnsignedShort”. +

+
+ +
+

3.10.7 long

+ +

+ The long type is a signed integer + type that has values in the range [−2147483648, 2147483647]. +

+

+ long constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + long type is “Long”. +

+
+ +
+

3.10.8 unsigned long

+ +

+ The unsigned long type is an unsigned integer + type that has values in the range [0, 4294967295]. +

+

+ unsigned long constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + unsigned long type is “UnsignedLong”. +

+
+ +
+

3.10.9 long long

+ +

+ The long long type is a signed integer + type that has values in the range [−9223372036854775808, 9223372036854775807]. +

+

+ long long constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + long long type is “LongLong”. +

+
+ +
+

3.10.10 unsigned long long

+ +

+ The unsigned long long type is an unsigned integer + type that has values in the range [0, 18446744073709551615]. +

+

+ unsigned long long constant values in IDL are + represented with integer + tokens. +

+

+ The type name of the + unsigned long long type is “UnsignedLongLong”. +

+
+ +
+

3.10.11 float

+ +

+ The float type is a floating point numeric + type that corresponds to the set of finite single-precision 32 bit + IEEE 754 floating point numbers. [IEEE-754] +

+

+ float constant values in IDL are + represented with float + tokens. +

+

+ The type name of the + float type is “Float”. +

+
Warning
+

+ Unless there are specific reasons to use a 32 bit floating point type, + specifications SHOULD use + double rather than float, + since the set of values that a double can + represent more closely matches an ECMAScript Number. +

+
+
+ +
+

3.10.12 unrestricted float

+ +

+ The unrestricted float type is a floating point numeric + type that corresponds to the set of all possible single-precision 32 bit + IEEE 754 floating point numbers, finite and non-finite. [IEEE-754] +

+

+ unrestricted float constant values in IDL are + represented with float + tokens. +

+

+ The type name of the + unrestricted float type is “UnrestrictedFloat”. +

+
+ +
+

3.10.13 double

+ +

+ The double type is a floating point numeric + type that corresponds to the set of finite double-precision 64 bit + IEEE 754 floating point numbers. [IEEE-754] +

+

+ double constant values in IDL are + represented with float + tokens. +

+

+ The type name of the + double type is “Double”. +

+
+ +
+

3.10.14 unrestricted double

+ +

+ The unrestricted double type is a floating point numeric + type that corresponds to the set of all possible double-precision 64 bit + IEEE 754 floating point numbers, finite and non-finite. [IEEE-754] +

+

+ unrestricted double constant values in IDL are + represented with float + tokens. +

+

+ The type name of the + unrestricted double type is “UnrestrictedDouble”. +

+
+ +
+

3.10.15 DOMString

+ +

+ The DOMString type + corresponds to the set of all possible sequences of code units. + Such sequences are commonly interpreted as UTF-16 encoded strings [RFC2781] + although this is not required. + While DOMString is defined to be an OMG IDL boxed + sequence<unsigned short> + valuetype in DOM Level 3 Core + ([DOM3CORE], section 1.2.1), + this document defines DOMString to be an intrinsic type so as to avoid + special casing that sequence type in various situations where a + string is required. +

+
Note
+

+ Note also that null + is not a value of type DOMString. + To allow null, a + nullable DOMString, + written as DOMString? in IDL, needs to be used. +

+
+

+ Nothing in this specification requires a DOMString + value to be a valid UTF-16 string. For example, a DOMString + value might include unmatched surrogate pair characters. However, authors + of specifications using Web IDL might want to obtain a sequence of + Unicode scalar values given a particular sequence of + code units. + The following algorithm defines a way to + convert a DOMString to a sequence of Unicode scalar values: +

+
    +
  1. Let S be the DOMString value.
  2. +
  3. Let n be the length of S.
  4. +
  5. Initialize i to 0.
  6. +
  7. Initialize U to be an empty sequence of Unicode characters.
  8. +
  9. While i < n: +
      +
    1. Let c be the code unit in S at index i.
    2. +
    3. Depending on the value of c: +
      +
      c < 0xD800 or c > 0xDFFF
      +
      Append to U the Unicode character with code point c.
      + +
      0xDC00 ≤ c ≤ 0xDFFF
      +
      Append to U a U+FFFD REPLACEMENT CHARACTER.
      + +
      0xD800 ≤ c ≤ 0xDBFF
      +
      +
        +
      1. If i = n−1, then append to U a U+FFFD REPLACEMENT CHARACTER.
      2. +
      3. Otherwise, i < n−1: +
          +
        1. Let d be the code unit in S at index + i+1.
        2. +
        3. If 0xDC00 ≤ d ≤ 0xDFFF, then: +
            +
          1. Let a be c & 0x3FF.
          2. +
          3. Let b be d & 0x3FF.
          4. +
          5. Append to U the Unicode character with + code point 216+210a+b.
          6. +
          7. Set i to i+1.
          8. +
          +
        4. +
        5. Otherwise, d < 0xDC00 or d > 0xDFFF. + Append to U a U+FFFD REPLACEMENT CHARACTER.
        6. +
        +
      4. +
      +
      +
      +
    4. +
    5. Set i to i+1.
    6. +
    +
  10. +
  11. Return U.
  12. +
+

+ There is no way to represent a constant DOMString + value in IDL, although DOMString dictionary member + and operation optional argument default values + can be specified using a string literal. +

+

+ The type name of the + DOMString type is “String”. +

+
+ +
+

3.10.16 ByteString

+ +

+ The ByteString type + corresponds to the set of all possible sequences of bytes. + Such sequences might be interpreted as UTF-8 encoded strings [RFC3629] + or strings in some other 8-bit-per-code-unit encoding, although this is not required. +

+

+ There is no way to represent a constant ByteString + value in IDL. +

+

+ The type name of the + ByteString type is “ByteString”. +

+
Warning
+

+ Specifications SHOULD only use + ByteString for interfacing with protocols + that use bytes and strings interchangably, such as HTTP. In general, + strings SHOULD be represented with + DOMString values, even if it is expected + that values of the string will always be in ASCII or some + 8 bit character encoding. Sequences or Typed Arrays + with octet or byte + elements SHOULD be used for holding + 8 bit data rather than ByteString. + [TYPEDARRAYS] +

+
+
+ +
+

3.10.17 USVString

+ +

+ The USVString type + corresponds to the set of all possible sequences of + Unicode scalar values, + which are all of the Unicode code points apart from the + surrogate code points. +

+

+ There is no way to represent a constant USVString + value in IDL, although USVString dictionary member + and operation optional argument default values + can be specified using a string literal. +

+

+ The type name of the + USVString type is “USVString”. +

+
Warning
+

+ Specifications SHOULD only use + USVString for APIs that perform + text processing and need a string of Unicode + scalar values to operate on. Most APIs that use strings + should instead be using DOMString, + which does not make any interpretations of the code units + in the string. When in doubt, use DOMString. +

+
+
+ +
+

3.10.18 object

+ +

+ The object type corresponds to the set of + all possible non-null object references. +

+

+ There is no way to represent a constant object + value in IDL. +

+

+ To denote a type that includes all possible object references plus the + null value, use the nullable type + object?. +

+

+ The type name of the + object type is “Object”. +

+
+ +
+

3.10.19 Interface types

+ +

+ An identifier that + identifies an interface is used to refer to + a type that corresponds to the set of all possible non-null references to objects that + implement that interface. +

+

+ For non-callback interfaces, an IDL value of the interface type is represented just + by an object reference. For callback interfaces, an IDL value of the interface type + is represented by a tuple of an object reference and a callback context. + The callback context is a language + binding specific value, and is used to store information about the execution context at + the time the language binding specific object reference is converted to an IDL value. +

+
Note
+

For ECMAScript objects, the callback context is used + to hold a reference to the + incumbent script + [HTML] at the time the Object value + is converted to an IDL callback interface type value. See + section 4.2.20 .

+
+

+ There is no way to represent a constant object reference value for + a particular interface type in IDL. +

+

+ To denote a type that includes all possible references to objects implementing + the given interface plus the null value, + use a nullable type. +

+

+ The type name of an interface type + is the identifier of the interface. +

+
+ +
+

3.10.20 Dictionary types

+ +

+ An identifier that + identifies a dictionary is used to refer to + a type that corresponds to the set of all dictionaries that adhere to + the dictionary definition. +

+

+ There is no way to represent a constant dictionary value in IDL. +

+

+ The type name of a dictionary type + is the identifier of the dictionary. +

+
+ +
+

3.10.21 Enumeration types

+ +

+ An identifier that + identifies an enumeration is used to + refer to a type whose values are the set of strings (sequences of + code units, as with + DOMString) that are the + enumeration’s values. +

+

+ Like DOMString, there is no way to represent a constant enumeration + value in IDL, although enumeration-typed dictionary member + default values can be specified using a + string literal. +

+

+ The type name of an enumeration type + is the identifier of the enumeration. +

+
+ +
+

3.10.22 Callback function types

+ +

+ An identifier that identifies + a callback function is used to refer to + a type whose values are references to objects that are functions with the given signature. +

+

+ An IDL value of the callback function type is represented by a tuple of an object + reference and a callback context. +

+
Note
+

As with callback interface types, the callback context is used + to hold a reference to the + incumbent script + [HTML] at the time an ECMAScript Object value + is converted to an IDL callback function type value. See + section 4.2.23 .

+
+

+ There is no way to represent a constant callback function + value in IDL. +

+

+ The type name of a callback function type + is the identifier of the callback function. +

+
+ +
+

3.10.23 Nullable types — T?

+ +

+ A nullable type is an IDL type constructed + from an existing type (called the inner type), + which just allows the additional value null + to be a member of its set of values. Nullable types + are represented in IDL by placing a U+003F QUESTION MARK ("?") + character after an existing type. The inner type MUST NOT + be any, + another nullable type, or a union type + that itself has includes a nullable type + or has a dictionary type as one of its + flattened member types. +

+
Note
+

Although dictionary types can in general be nullable, they cannot when used + as the type of an operation argument or a dictionary member.

+
+

+ Nullable type constant values in IDL are represented in the same way that + constant values of their inner type + would be represented, or with the null token. +

+

+ The type name of a nullable type + is the concatenation of the type name of the inner type T and + the string “OrNull”. +

+
Example
+

+ For example, a type that allows the values true, + false and null + is written as boolean?: +

+
IDL
interface MyConstants {
+  const boolean? ARE_WE_THERE_YET = false;
+};
+

+ The following interface has two + attributes: one whose value can + be a DOMString or the null + value, and another whose value can be a reference to a Node + object or the null value: +

+
IDL
interface Node {
+  readonly attribute DOMString? namespaceURI;
+  readonly attribute Node? parentNode;
+  // ...
+};
+
+
+ +
+

3.10.24 Sequences — sequence<T>

+ +

+ The sequence<T> + type is a parameterized type whose values are (possibly zero-length) sequences of + values of type T. +

+

+ Sequences are always passed by value. In + language bindings where a sequence is represented by an object of + some kind, passing a sequence to a platform object + will not result in a reference to the sequence being kept by that object. + Similarly, any sequence returned from a platform object + will be a copy and modifications made to it will not be visible to the platform object. +

+

+ There is no way to represent a constant sequence value in IDL. +

+

+ Sequences MUST NOT be used as the + type of an attribute or + constant. +

+
Note
+

+ This restriction exists so that it is clear to specification writers + and API users that sequences + are copied rather than having references + to them passed around. Instead of a writable attribute of a sequence + type, it is suggested that a pair of operations to get and set the + sequence is used. +

+
+

+ The type name of a sequence type + is the concatenation of the type name for T and + the string “Sequence”. +

+
+ +
+

3.10.25 Promise types — Promise<T>

+ +

+ A promise type is a parameterized type + whose values are references to objects that “is used as a place holder + for the eventual results of a deferred (and possibly asynchronous) computation + result of an asynchronous operation” [ECMA-262]. + See section 25.4 + of the ECMAScript specification for details on the semantics of promise objects. +

+

+ There is no way to represent a promise value in IDL. +

+

+ The type name of a promise type + is the concatenation of the type name for T and + the string “Promise”. +

+
+ +
+

3.10.26 Union types

+ +

+ A union type is a type whose set of values + is the union of those in two or more other types. Union types (matching + UnionType) + are written as a series of types separated by the or keyword + with a set of surrounding parentheses. + The types which comprise the union type are known as the + union’s member types. +

+
Note
+

+ For example, you might write (Node or DOMString) + or (double or sequence<double>). When applying a + ? suffix to a + union type + as a whole, it is placed after the closing parenthesis, + as in (Node or DOMString)?. +

+

+ Note that the member types + of a union type do not descend into nested union types. So for + (double or (sequence<long> or Event) or (Node or DOMString)?) the member types + are double, (sequence<long> or Event) and + (Node or DOMString)?. +

+
+

+ Like the any type, values of + union types have a specific type, + which is the particular member type + that matches the value. +

+

+ The flattened member types + of a union type is a set of types + determined as follows: +

+
    +
  1. Let T be the union type.
  2. +
  3. Initialize S to ∅.
  4. +
  5. For each member type U of T: +
      +
    1. If U is a nullable type, then + set U to be the inner type of U.
    2. +
    3. If U is a union type, then + add to S the flattened member types + of U.
    4. +
    5. Otherwise, U is not a union type. + Add U to S.
    6. +
    +
  6. +
  7. Return S.
  8. +
+
Note
+

+ For example, the flattened member types + of the union type + (Node or (sequence<long> or Event) or (XMLHttpRequest or DOMString)? or sequence<(sequence<double> or NodeList)>) + are the six types Node, sequence<long>, Event, + XMLHttpRequest, DOMString and + sequence<(sequence<double> or NodeList)>. +

+
+

+ The number of nullable member types + of a union type is an integer + determined as follows: +

+
    +
  1. Let T be the union type.
  2. +
  3. Initialize n to 0.
  4. +
  5. For each member type U of T: +
      +
    1. If U is a nullable type, then: +
        +
      1. Set n to n + 1.
      2. +
      3. Set U to be the inner type of U.
      4. +
      +
    2. +
    3. If U is a union type, then: +
        +
      1. Let m be the number + of nullable member types of U.
      2. +
      3. Set n to n + m.
      4. +
    4. +
    +
  6. +
  7. Return n.
  8. +
+

+ The any type MUST NOT + be used as a union member type. +

+

+ The number of nullable member types + of a union type MUST + be 0 or 1, and if it is 1 then the union type MUST also not have + a dictionary type in its + flattened member types. +

+

+ A type includes a nullable type if: +

+ +

+ Each pair of flattened member types + in a union type, T and U, + MUST be distinguishable. +

+

+ Union type constant values + in IDL are represented in the same way that constant values of their + member types would be + represented. +

+

+ The type name of a union + type is formed by taking the type names of each member type, in order, + and joining them with the string “Or”. +

+
[75]UnionType"(" UnionMemberType "or" UnionMemberType UnionMemberTypes ")"
[76]UnionMemberTypeNonAnyType
 | + UnionType Null
[77]UnionMemberTypes"or" UnionMemberType UnionMemberTypes
 | + ε
[78]NonAnyTypePrimitiveType Null
 | + PromiseType Null
 | + "ByteString" Null
 | + "DOMString" Null
 | + "USVString" Null
 | + identifier Null
 | + "sequence" "<" Type ">" Null
 | + "object" Null
 | + "Error" Null
 | + "DOMException" Null
 | + BufferRelatedType Null
+
+ + + +
+

3.10.27 Error

+ +

The Error type corresponds to the + set of all possible non-null references to exception objects, including + simple exceptions + and DOMExceptions.

+

+ There is no way to represent a constant Error + value in IDL. +

+

+ The type name of the + Error type is “Error”. +

+
+ +
+

3.10.28 DOMException

+ +

The DOMException type corresponds to the + set of all possible non-null references to objects + representing DOMExceptions.

+

+ There is no way to represent a constant DOMException + value in IDL. +

+

+ The type name of the + DOMException type is “DOMException”. +

+
+ +
+

3.10.29 Buffer source types

+ +

+ There are a number of types that correspond to sets of all possible non-null + references to objects that represent a buffer of data or a view on to a buffer of + data. The table below lists these types and the kind of buffer or view they represent. +

+ + + + + + + + + + + + +
TypeKind of buffer
ArrayBufferAn object that holds a pointer (which may be null) to a buffer of a fixed number of bytes
DataViewA view on to an ArrayBuffer that allows typed access to integers and floating point values stored at arbitrary offsets into the buffer
+ Int8Array,
+ Int16Array,
+ Int32Array
A view on to an ArrayBuffer that exposes it as an array of two’s complement signed integers of the given size in bits
+ Uint8Array,
+ Uint16Array,
+ Uint32Array
A view on to an ArrayBuffer that exposes it as an array of unsigned integers of the given size in bits
Uint8ClampedArrayA view on to an ArrayBuffer that exposes it as an array of unsigned 8 bit integers with clamped conversions
+ Float32Array,
+ Float64Array
A view on to an ArrayBuffer that exposes it as an array of IEEE 754 floating point numbers of the given size in bits
+
Note
+

These types all correspond to classes defined in ECMAScript.

+
+

+ To detach an ArrayBuffer + is to set its buffer pointer to null. +

+

+ There is no way to represent a constant value of any of these types in IDL. +

+

+ The type name of all + of these types is the name of the type itself. +

+

+ At the specification prose level, IDL buffer source types + are simply references to objects. To inspect or manipulate the bytes inside the buffer, + specification prose MUST first either + get a reference to the bytes held by the buffer source + or get a copy of the bytes held by the buffer source. + With a reference to the buffer source’s bytes, specification prose can get or set individual + byte values using that reference. +

+
Warning
+

+ Extreme care must be taken when writing specification text that gets a reference + to the bytes held by a buffer source, as the underyling data can easily be changed + by the script author or other APIs at unpredictable times. If you are using a buffer source type + as an operation argument to obtain a chunk of binary data that will not be modified, + it is strongly recommended to get a copy of the buffer source’s bytes at the beginning + of the prose defining the operation. +

+

+ Requiring prose to explicitly get a reference to or copy of the bytes is intended to + help specification reviewers look for problematic uses of these buffer source types. +

+
+
Note
+

+ When designing APIs that take a buffer, it is recommended to use the + BufferSource typedef rather than ArrayBuffer + or any of the view types. +

+

+ When designing APIs that create and return a buffer, it is recommended + to use the ArrayBuffer type rather than + Uint8Array. +

+
+

+ Attempting to get a reference to or + get a copy of the bytes held by a buffer source + when the ArrayBuffer has been detached + will fail in a language binding-specific manner. +

+
Note
+

See section 4.2.30 below for + how interacting with buffer source types works in the ECMAScript language binding.

+
+ +
+ + +
+ +
+

3.11 Extended attributes

+ +

+ An extended attribute is an annotation + that can appear on + definitions, + interface members, + dictionary members, + and operation arguments, and + is used to control how language bindings will handle those constructs. + Extended attributes are specified with an + ExtendedAttributeList, + which is a square bracket enclosed, comma separated list of + ExtendedAttributes. +

+

+ The ExtendedAttribute + grammar symbol matches nearly any sequence of tokens, however the + extended attributes + defined in this document only accept a more restricted syntax. + Any extended attribute encountered in an + IDL fragment is + + matched against the following six grammar symbols to determine + which form (or forms) it is in: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Grammar symbolFormExample
+ ExtendedAttributeNoArgs + + takes no arguments + + [Replaceable] +
+ ExtendedAttributeArgList + + takes an argument list + + [Constructor(double x, double y)] +
+ ExtendedAttributeNamedArgList + + takes a named argument list + + [NamedConstructor=Image(DOMString src)] +
+ ExtendedAttributeIdent + + takes an identifier + + [PutForwards=name] +
+ ExtendedAttributeIdentList + + takes an identifier list + + [Exposed=(Window,Worker)] +
+ +

+ This specification defines a number of extended attributes that + are applicable to the ECMAScript language binding, which are described in + section 4.3 . + Each extended attribute definition will state which of the above + six forms are allowed. +

+ +
[65]ExtendedAttributeList"[" ExtendedAttribute ExtendedAttributes "]"
 | + ε
[66]ExtendedAttributes"," ExtendedAttribute ExtendedAttributes
 | + ε
[67]ExtendedAttribute + "(" ExtendedAttributeInner ")" ExtendedAttributeRest +
 | + "[" ExtendedAttributeInner "]" ExtendedAttributeRest +
 | + "{" ExtendedAttributeInner "}" ExtendedAttributeRest +
 | + Other ExtendedAttributeRest +
[68]ExtendedAttributeRestExtendedAttribute
 | + ε
[69]ExtendedAttributeInner + "(" ExtendedAttributeInner ")" ExtendedAttributeInner +
 | + "[" ExtendedAttributeInner "]" ExtendedAttributeInner +
 | + "{" ExtendedAttributeInner "}" ExtendedAttributeInner +
 | + OtherOrComma ExtendedAttributeInner +
 | + ε +
[70]Other + integer
 | + float
 | + identifier
 | + string
 | + other +
 | + "-"
 | + "-Infinity"
 | + "."
 | + "..."
 | + ":"
 | + ";"
 | + "<"
 | + "="
 | + ">"
 | + "?" +
 | + "ByteString"
 | + "DOMString"
 | + "Infinity"
 | + "NaN"
 | + "USVString"
 | + "any"
 | + "boolean"
 | + "byte"
 | + "double"
 | + "false"
 | + "float" +
 | + "long"
 | + "null"
 | + "object"
 | + "octet"
 | + "or"
 | + "optional"
 | + "sequence" +
 | + "short"
 | + "true"
 | + "unsigned"
 | + "void" +
 | + ArgumentNameKeyword +
 | + BufferRelatedType +
[72]OtherOrCommaOther
 | + ","
[90]IdentifierListidentifier Identifiers
[91]Identifiers"," identifier Identifiers
 | + ε
[92]ExtendedAttributeNoArgsidentifier
[93]ExtendedAttributeArgListidentifier "(" ArgumentList ")"
[94]ExtendedAttributeIdentidentifier "=" identifier
[95]ExtendedAttributeIdentListidentifier "=" "(" IdentifierList ")"
[96]ExtendedAttributeNamedArgListidentifier "=" identifier "(" ArgumentList ")"
+ +
+
+ +
+

4. ECMAScript binding

+ +

+ This section describes how definitions written with the IDL defined in + section 3. correspond to particular constructs + in ECMAScript, as defined by the ECMAScript Language Specification 6th Edition + [ECMA-262]. +

+

+ Objects defined in this section have internal properties as described in + ECMA-262 sections 9.1 and + 9.3.1 unless otherwise specified, in which case one or + more of the following are redefined in accordance with the rules for exotic objects: + [[Call]], + [[Set]], + [[DefineOwnProperty]], + [[GetOwnProperty]], + [[Delete]] and + [[HasInstance]]. +

+

+ Unless otherwise specified, the [[Extensible]] internal property + of objects defined in this section has the value true. +

+

+ Unless otherwise specified, the [[Prototype]] internal property + of objects defined in this section is the Object prototype object. +

+

+ Some objects described in this section are defined to have a class string, + which is the string to include in the string returned from Object.prototype.toString. + If an object has a class string, then the object MUST, + at the time it is created, have a property whose name is the @@toStringTag symbol + and whose value is the specified string. +

+ +

+ If an object is defined to be a function object, then + it has characteristics as follows: +

+ + +

+ Algorithms in this section use the conventions described in ECMA-262 + section 5.2, such as the use of steps and substeps, the use of mathematical + operations, and so on. The + ToBoolean, + ToNumber, + ToUint16, + ToInt32, + ToUint32, + ToString, + ToObject, + IsAccessorDescriptor and + IsDataDescriptor abstract operations and the + Type(x) + notation referenced in this section are defined in ECMA-262 sections 6 and 7. +

+

+ When an algorithm says to “throw a SomethingError” then this means to + construct a new ECMAScript SomethingError object and to throw it, + just as the algorithms in ECMA-262 do. +

+

+ Note that algorithm steps can call in to other algorithms and abstract operations and + not explicitly handle exceptions that are thrown from them. When an exception + is thrown by an algorithm or abstract operation and it is not explicitly + handled by the caller, then it is taken to end the algorithm and propagate out + to its caller, and so on. +

+
Example
+

+ Consider the following algorithm: +

+
    +
  1. Let x be the ECMAScript value passed in to this algorithm.
  2. +
  3. Let y be the result of calling ToString(x).
  4. +
  5. Return y.
  6. +
+

+ Since ToString can throw an exception (for example if passed the object + ({ toString: function() { throw 1 } })), and the exception is + not handled in the above algorithm, if one is thrown then it causes this + algorithm to end and for the exception to propagate out to its caller, if there + is one. +

+
+ +
+

4.1 ECMAScript environment

+

+ In an ECMAScript implementation of a given set of + IDL fragments, + there will exist a number of ECMAScript objects that correspond to + definitions in those IDL fragments. + These objects are termed the initial objects, + and comprise the following: +

+ +

+ Each ECMAScript global environment ([ECMA-262], section 8.2) + MUST have its own unique set of each of + the initial objects, created + before control enters any ECMAScript execution context associated with the + environment, but after the global object for that environment is created. The [[Prototype]]s + of all initial objects in a given global environment MUST come from + that same global environment. +

+
Example
+

+ In an HTML user agent, multiple global environments can exist when + multiple frames or windows are created. Each frame or window will have + its own set of initial objects, + which the following HTML document demonstrates: +

+
HTML
<!DOCTYPE html>
+<title>Different global environments</title>
+<iframe id=a></iframe>
+<script>
+var iframe = document.getElementById("a");
+var w = iframe.contentWindow;              // The global object in the frame
+
+Object == w.Object;                        // Evaluates to false, per ECMA-262
+Node == w.Node;                            // Evaluates to false
+iframe instanceof w.Node;                  // Evaluates to false
+iframe instanceof w.Object;                // Evaluates to false
+iframe.appendChild instanceof Function;    // Evaluates to true
+iframe.appendChild instanceof w.Function;  // Evaluates to false
+</script>
+
+

+ Unless otherwise specified, each ECMAScript global environment exposes + all interfaces + that the implementation supports. If a given ECMAScript global environment does not + expose an interface, then the requirements given in + section 4.5 are + not followed for that interface. +

+
Note
+

+ This allows, for example, ECMAScript global environments for Web Workers to expose + different sets of supported interfaces from those exposed in environments + for Web pages. +

+
+
+ +
+

4.2 ECMAScript type mapping

+ +

+ This section describes how types in the IDL map to types in ECMAScript. +

+

+ Each sub-section below describes how values of a given IDL type are represented + in ECMAScript. For each IDL type, it is described how ECMAScript values are + converted to an IDL value + when passed to a platform object expecting that type, and how IDL values + of that type are converted to ECMAScript values + when returned from a platform object. +

+ +
+

4.2.1 any

+ +

+ Since the IDL any type + is the union of all other IDL types, it can correspond to any + ECMAScript value type. +

+

+ How to convert an ECMAScript value to an IDL any value depends on the type of the + ECMAScript value: +

+
+
The undefined value
+
+ The IDL value is an + object reference + to a special object that represents the ECMAScript + undefined value. +
+
The null value
+
+ The IDL value is the null + object? reference. +
+
A Boolean value
+
+ The IDL value is the + boolean + value that represents the same truth value. +
+
A Number value
+
+ The IDL value is that which is obtained + by following the rules for converting the + Number to an IDL + unrestricted double value, + as described in section 4.2.15 , + . +
+
A String value
+
+ The IDL value is that which is obtained + by following the rules for converting the + String to an IDL + DOMString value, + as described in section 4.2.16 , + . +
+
An object value
+
+ The IDL value is an + object value that + references the same object. +
+
+

+ An IDL any value is + converted to an ECMAScript value + as follows. If the value is an object + reference to a special object that represents an ECMAScript undefined + value, then it is converted to the ECMAScript + undefined value. Otherwise, + the rules for converting the specific type + of the IDL any value + as described in the remainder of this section are performed. +

+
+ +
+

4.2.2 void

+ +

+ The only place that the void type may appear + in IDL is as the return type of an + operation. Functions on platform objects + that implement an operation whose IDL specifies a + void return type MUST return the + undefined value. +

+

+ ECMAScript functions that implement an operation whose IDL + specifies a void return type + MAY return any value, which will be discarded. +

+
+ +
+

4.2.3 boolean

+ +

+ An ECMAScript value V is + converted + to an IDL boolean value + by running the following algorithm: +

+
    +
  1. Let x be the result of computing ToBoolean(V).
  2. +
  3. Return the IDL boolean value that is the one that represents the same truth value as the ECMAScript Boolean value x.
  4. +
+

+ The IDL boolean value true + is converted to + the ECMAScript true value and the IDL boolean + value false is converted to the ECMAScript + false value. +

+
+ +
+

4.2.4 byte

+ +

+ An ECMAScript value V is + converted + to an IDL byte value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < −27 or x > 27 − 1, then throw a TypeError.
    6. +
    7. Return the IDL byte value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, −27), 27 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL byte value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL byte value that represents 0.
  8. +
  9. Set x to sign(x) * floor(abs(x)).
  10. +
  11. Set x to x modulo 28.
  12. +
  13. If x ≥ 27, return the IDL byte value that represents the same numeric value as x − 28. + Otherwise, return the IDL byte value that represents the same numeric value as x.
  14. +
+

+ The result of converting + an IDL byte value to an ECMAScript + value is a Number that represents + the same numeric value as the IDL byte value. + The Number value will be an integer in the range [−128, 127]. +

+
+ +
+

4.2.5 octet

+ +

+ An ECMAScript value V is + converted + to an IDL octet value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < 0 or x > 28 − 1, then throw a TypeError.
    6. +
    7. Return the IDL octet value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, 0), 28 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL octet value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL octet value that represents 0.
  8. +
  9. Set x to sign(x) * floor(abs(x)).
  10. +
  11. Set x to x modulo 28.
  12. +
  13. Return the IDL octet value that represents the same numeric value as x.
  14. +
+

+ The result of converting + an IDL octet value to an ECMAScript + value is a Number that represents + the same numeric value as the IDL + octet value. + The Number value will be an integer in the range [0, 255]. +

+
+ +
+

4.2.6 short

+ +

+ An ECMAScript value V is + converted + to an IDL short value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < −215 or x > 215 − 1, then throw a TypeError.
    6. +
    7. Return the IDL short value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, −215), 215 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL short value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL short value that represents 0.
  8. +
  9. Set x to sign(x) * floor(abs(x)).
  10. +
  11. Set x to x modulo 216.
  12. +
  13. If x ≥ 215, return the IDL short value that represents the same numeric value as x − 216. + Otherwise, return the IDL short value that represents the same numeric value as x.
  14. +
+

+ The result of converting + an IDL short value to an ECMAScript + value is a Number that represents the + same numeric value as the IDL + short value. + The Number value will be an integer in the range [−32768, 32767]. +

+
+ +
+

4.2.7 unsigned short

+ +

+ An ECMAScript value V is + converted + to an IDL unsigned short value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < 0 or x > 216 − 1, then throw a TypeError.
    6. +
    7. Return the IDL unsigned short value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, 0), 216 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL unsigned short value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. Set x to ToUint16(x).
  8. +
  9. Return the IDL unsigned short value that represents the same numeric value as x.
  10. +
+

+ The result of converting + an IDL unsigned short value to an ECMAScript + value is a Number that + represents the same numeric value as the IDL + unsigned short value. + The Number value will be an integer in the range [0, 65535]. +

+
+ +
+

4.2.8 long

+ +

+ An ECMAScript value V is + converted + to an IDL long value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < −231 or x > 231 − 1, then throw a TypeError.
    6. +
    7. Return the IDL long value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, −231), 231 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL long value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. Set x to ToInt32(x).
  8. +
  9. Return the IDL long value that represents the same numeric value as x.
  10. +
+

+ The result of converting + an IDL long value to an ECMAScript + value is a Number that + represents the same numeric value as the IDL + long value. + The Number value will be an integer in the range [−2147483648, 2147483647]. +

+
+ +
+

4.2.9 unsigned long

+ +

+ An ECMAScript value V is + converted + to an IDL unsigned long value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < 0 or x > 232 − 1, then throw a TypeError.
    6. +
    7. Return the IDL unsigned long value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, 0), 232 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL unsigned long value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. Set x to ToUint32(x).
  8. +
  9. Return the IDL unsigned long value that represents the same numeric value as x.
  10. +
+

+ The result of converting + an IDL unsigned long value to an ECMAScript + value is a Number that + represents the same numeric value as the IDL + unsigned long value. + The Number value will be an integer in the range [0, 4294967295]. +

+
+ +
+

4.2.10 long long

+ +

+ An ECMAScript value V is + converted + to an IDL long long value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < −253 + 1 or x > 253 − 1, then throw a TypeError.
    6. +
    7. Return the IDL long long value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, −253 + 1), 253 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL long long value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL long long value that represents 0.
  8. +
  9. Set x to sign(x) * floor(abs(x)).
  10. +
  11. Set x to x modulo 264.
  12. +
  13. If x is greater than or equal to 263, then set x to x − 264.
  14. +
  15. Return the IDL long long value that represents the same numeric value as x.
  16. +
+

+ The result of converting + an IDL long long value to an ECMAScript + value is a Number value that + represents the closest numeric value to the long long, + choosing the numeric value with an even significand if there are + two equally close values ( + [ECMA-262] + , section 6.1.6). + If the long long is in the range + [−253 + 1, 253 − 1], then the Number + will be able to represent exactly the same value as the + long long. +

+
+ +
+

4.2.11 unsigned long long

+ +

+ An ECMAScript value V is + converted + to an IDL unsigned long long value + by running the following algorithm: +

+
    +
  1. Initialize x to ToNumber(V).
  2. +
  3. If the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. +
    3. Set x to sign(x) * floor(abs(x)).
    4. +
    5. If x < 0 or x > 253 − 1, then throw a TypeError.
    6. +
    7. Return the IDL unsigned long long value that represents the same numeric value as x.
    8. +
    +
  4. +
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: + + then: +
      +
    1. Set x to min(max(x, 0), 253 − 1).
    2. +
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. +
    5. Return the IDL unsigned long long value that represents the same numeric value as x.
    6. +
    +
  6. +
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL unsigned long long value that represents 0.
  8. +
  9. Set x to sign(x) * floor(abs(x)).
  10. +
  11. Set x to x modulo 264.
  12. +
  13. Return the IDL unsigned long long value that represents the same numeric value as x.
  14. +
+

+ The result of converting + an IDL unsigned long long value to an ECMAScript + value is a Number value that + represents the closest numeric value to the unsigned long long, + choosing the numeric value with an even significand if there are + two equally close values ( + [ECMA-262] + , section 6.1.6). + If the unsigned long long is less than or equal to 253 − 1, + then the Number will be able to + represent exactly the same value as the + unsigned long long. +

+
+ +
+

4.2.12 float

+ +

+ An ECMAScript value V is + converted + to an IDL float value + by running the following algorithm: +

+
    +
  1. Let x be ToNumber(V).
  2. +
  3. If x is NaN, +Infinity or + −Infinity, then throw a TypeError.
  4. +
  5. + Let S be the set of finite IEEE 754 single-precision floating + point values except −0, but with two special values added: 2128 and + −2128. +
  6. +
  7. + Let y be the number in S that is closest + to x, selecting the number with an + even significand if there are two equally close values ( + [ECMA-262] + , section 6.1.6). + (The two special values 2128 and −2128 + are considered to have even significands for this purpose.) +
  8. +
  9. + If y is 2128 or −2128, then throw a TypeError. +
  10. +
  11. + If y is +0 and x is negative, return −0. +
  12. +
  13. + Return y. +
  14. +
+

+ The result of converting + an IDL float value to an ECMAScript + value is the Number value that represents the same numeric value as the IDL + float value. +

+
+ +
+

4.2.13 unrestricted float

+ +

+ An ECMAScript value V is + converted + to an IDL unrestricted float value + by running the following algorithm: +

+
    +
  1. Let x be ToNumber(V).
  2. +
  3. If x is NaN, then return the IDL unrestricted float value that represents the IEEE 754 NaN value with the bit pattern 0x7fc00000 [IEEE-754].
  4. +
  5. + Let S be the set of finite IEEE 754 single-precision floating + point values except −0, but with two special values added: 2128 and + −2128. +
  6. +
  7. + Let y be the number in S that is closest + to x, selecting the number with an + even significand if there are two equally close values ( + [ECMA-262] + , section 6.1.6). + (The two special values 2128 and −2128 + are considered to have even significands for this purpose.) +
  8. +
  9. + If y is 2128, return +∞. +
  10. +
  11. + If y is −2128, return −∞. +
  12. +
  13. + If y is +0 and x is negative, return −0. +
  14. +
  15. + Return y. +
  16. +
+
Note
+

+ Since there is only a single ECMAScript NaN value, + it must be canonicalized to a particular single precision IEEE 754 NaN value. The NaN value + mentioned above is chosen simply because it is the quiet NaN with the lowest + value when its bit pattern is interpreted as an unsigned 32 bit integer. +

+
+

+ The result of converting + an IDL unrestricted float value to an ECMAScript + value is a Number: +

+
    +
  • + If the IDL unrestricted float value is a NaN, + then the Number value is NaN. +
  • +
  • + Otherwise, the Number value is + the one that represents the same numeric value as the IDL + unrestricted float value. +
  • +
+
+ +
+

4.2.14 double

+ +

+ An ECMAScript value V is + converted + to an IDL double value + by running the following algorithm: +

+
    +
  1. Let x be ToNumber(V).
  2. +
  3. If x is NaN, +Infinity or + −Infinity, then throw a TypeError.
  4. +
  5. + Return the IDL double value + that has the same numeric value as x. +
  6. +
+

+ The result of converting + an IDL double value to an ECMAScript + value is the Number value that represents the + same numeric value as the IDL double value. +

+
+ +
+

4.2.15 unrestricted double

+ +

+ An ECMAScript value V is + converted + to an IDL unrestricted double value + by running the following algorithm: +

+
    +
  1. Let x be ToNumber(V).
  2. +
  3. If x is NaN, then return the IDL unrestricted double value that represents the IEEE 754 NaN value with the bit pattern 0x7ff8000000000000 [IEEE-754].
  4. +
  5. + Return the IDL unrestricted double value + that has the same numeric value as x. +
  6. +
+
Note
+

+ Since there is only a single ECMAScript NaN value, + it must be canonicalized to a particular double precision IEEE 754 NaN value. The NaN value + mentioned above is chosen simply because it is the quiet NaN with the lowest + value when its bit pattern is interpreted as an unsigned 64 bit integer. +

+
+

+ The result of converting + an IDL unrestricted double value to an ECMAScript + value is a Number: +

+
    +
  • + If the IDL unrestricted double value is a NaN, + then the Number value is NaN. +
  • +
  • + Otherwise, the Number value is + the one that represents the same numeric value as the IDL + unrestricted double value. +
  • +
+
+ +
+

4.2.16 DOMString

+ +

+ An ECMAScript value V is + converted + to an IDL DOMString value + by running the following algorithm: +

+
    +
  1. If V is null + and the conversion to an IDL value is being performed due + to any of the following: + + then return the DOMString + value that represents the empty string. +
  2. +
  3. Let x be ToString(V).
  4. +
  5. Return the IDL DOMString value that represents the same sequence of code units as the one the ECMAScript String value x represents.
  6. +
+

+ The result of converting + an IDL DOMString value to an ECMAScript + value is the String + value that represents the same sequence of code units that the + IDL DOMString represents. +

+
+ +
+

4.2.17 ByteString

+ +

+ An ECMAScript value V is + converted + to an IDL ByteString value + by running the following algorithm: +

+
    +
  1. Let x be ToString(V).
  2. +
  3. If the value of any element + of x is greater than 255, then throw a TypeError.
  4. +
  5. Return an IDL ByteString value + whose length is the length of x, and where the value of each element is + the value of the corresponding element of x.
  6. +
+

+ The result of converting + an IDL ByteString value to an ECMAScript + value is a String + value whose length is the length of the ByteString, + and the value of each element of which is the value of the corresponding element + of the ByteString. +

+
+ +
+

4.2.18 USVString

+ +

+ An ECMAScript value V is + converted + to an IDL USVString value + by running the following algorithm: +

+
    +
  1. Let string be the result of converting V + to a DOMString.
  2. +
  3. Return an IDL USVString value + that is the result of converting string to a sequence of Unicode scalar values.
  4. +
+

+ An IDL USVString value is + converted + to an ECMAScript value by running the following algorithm: +

+
    +
  1. Let scalarValues be the sequence of Unicode scalar values the USVString represents.
  2. +
  3. Let string be the sequence of code units that results from encoding scalarValues in UTF-16.
  4. +
  5. Return the String value that represents the same sequence of code units as string.
  6. +
+
+ +
+

4.2.19 object

+ +

+ IDL object + values are represented by ECMAScript Object values. +

+

+ An ECMAScript value V is + converted + to an IDL object value + by running the following algorithm: +

+
    +
  1. If Type(V) is not Object, then throw a TypeError.
  2. +
  3. Return the IDL object value that is a reference to the same object as V.
  4. +
+

+ The result of converting + an IDL object value to an ECMAScript + value is the Object value that represents a reference to the same object that the + IDL object represents. +

+
+ +
+

4.2.20 Interface types

+ +

+ IDL interface type + values are represented by ECMAScript Object or + Function values. +

+

+ An ECMAScript value V is + converted + to an IDL interface type value + by running the following algorithm (where I is the interface): +

+
    +
  1. If Type(V) is not Object, then throw a TypeError.
  2. +
  3. If V is a platform object that implements I, then return the IDL interface type value that represents a reference to that platform object.
  4. +
  5. If V is a user object + that is considered to implement I according to the rules in + section 4.8 , + then return the IDL interface type value that represents a reference to that user object, + with the incumbent script + as the callback context. [HTML]
  6. +
  7. Throw a TypeError.
  8. +
+

+ The result of converting + an IDL interface type + value to an ECMAScript value is the Object + value that represents a reference to the same object that the IDL + interface type value represents. +

+
+ +
+

4.2.21 Dictionary types

+ +

+ IDL dictionary type values are represented + by ECMAScript Object values. Properties on + the object (or its prototype chain) correspond to dictionary members. +

+

+ An ECMAScript value V is + converted + to an IDL dictionary type value + by running the following algorithm (where D is the dictionary): +

+
    +
  1. If Type(V) is not Undefined, Null or Object, then throw a TypeError.
  2. +
  3. If V object, then throw a TypeError.
  4. +
  5. Let dict be an empty dictionary value of type D; + every dictionary member + is initially considered to be not present.
  6. +
  7. Let dictionaries be a list consisting of D and all of D’s inherited dictionaries, + in order from least to most derived.
  8. +
  9. For each dictionary dictionary in dictionaries, in order: +
      +
    1. For each dictionary member member declared on dictionary, in lexicographical order: +
        +
      1. Let key be the identifier of member.
      2. +
      3. Let value be an ECMAScript value, depending on Type(V): +
        +
        Undefined
        +
        Null
        +
        value is undefined.
        +
        anything else
        +
        value is the result of calling the [[Get]] internal method on V with property name key.
        +
        +
      4. +
      5. If value is not undefined, then: +
          +
        1. Let idlValue be the result of converting value to an IDL value whose type is the type member is declared to be of.
        2. +
        3. Set the dictionary member on dict with key name key to the value idlValue. This dictionary member is considered to be present.
        4. +
        +
      6. +
      7. Otherwise, if value is undefined but the dictionary member has a default value, then: +
          +
        1. Let idlValue be the dictionary member’s default value.
        2. +
        3. Set the dictionary member on dict with key name key to the value idlValue. This dictionary member is considered to be present.
        4. +
        +
      8. +
      9. + Otherwise, if value is + undefined and the + dictionary + member is a + required dictionary + member, then throw a TypeError. +
      10. +
      +
    2. +
    +
  10. +
  11. Return dict.
  12. +
+
Note
+

+ The order that dictionary members are looked + up on the ECMAScript object are not necessarily the same as the object’s property enumeration order. +

+
+

+ An IDL dictionary value V is + converted + to an ECMAScript Object value + by running the following algorithm (where D is the dictionary): +

+
    +
  1. Let O be a new Object value created as if by the expression ({}).
  2. +
  3. Let dictionaries be a list consisting of D and all of D’s inherited dictionaries, + in order from least to most derived.
  4. +
  5. For each dictionary dictionary in dictionaries, in order: +
      +
    1. For each dictionary member member declared on dictionary, in lexicographical order: +
        +
      1. Let key be the identifier of member.
      2. +
      3. If the dictionary member named key is present in V, then: +
          +
        1. Let idlValue be the value of member on V.
        2. +
        3. Let value be the result of converting idlValue to an ECMAScript value.
        4. +
        5. Call the [[DefineOwnProperty]] internal method on O with property name key, + Property Descriptor { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true, [[Value]]: value } + and Boolean flag false.
        6. +
        +
      4. +
      +
    2. +
    +
  6. +
  7. Return O.
  8. +
+
+ +
+

4.2.22 Enumeration types

+ +

+ IDL enumeration types are represented by ECMAScript String + values. +

+

+ An ECMAScript value V is + converted + to an IDL enumeration type + value as follows (where E is the enumeration): +

+
    +
  1. Let S be the result of calling ToString(V).
  2. +
  3. If S is not one of E’s enumeration values, then throw a TypeError.
  4. +
  5. Return the enumeration value of type E that is equal to S.
  6. +
+

+ The result of converting + an IDL enumeration type value to an ECMAScript + value is the String + value that represents the same sequence of code units as + the enumeration value. +

+
+ +
+

4.2.23 Callback function types

+ +

+ IDL callback function types are represented by ECMAScript Function + objects, except in the [TreatNonObjectAsNull] case, + when they can be any object. +

+

+ An ECMAScript value V is + converted + to an IDL callback function type value + by running the following algorithm: +

+
    +
  1. If the result of calling IsCallable(V) is false and the conversion to an IDL value + is not being performed due + to V being assigned to an attribute + whose type is a nullable + callback function + that is annotated with [TreatNonObjectAsNull], + then throw a TypeError.
  2. +
  3. Return the IDL callback + function type value that represents a reference to the same + object that V represents, + with the incumbent script + as the callback context. [HTML].
  4. +
+

+ The result of converting + an IDL callback function type + value to an ECMAScript value is a reference to the same object + that the IDL callback function type value represents. +

+
+ +
+

4.2.24 Nullable types — T?

+ +

+ IDL nullable type values are represented + by values of either the ECMAScript type corresponding to the inner IDL type, or + the ECMAScript null value. +

+

+ An ECMAScript value V is + converted + to an IDL nullable type T? + value (where T is the inner type) as follows: +

+
    +
  1. + If Type(V) is not Object, and + the conversion to an IDL value is being performed due + to V being assigned to an attribute + whose type is a nullable + callback function + that is annotated with [TreatNonObjectAsNull], + then return the IDL + nullable type T? + value null. +
  2. +
  3. + Otherwise, if V is null or undefined, then return the IDL + nullable type T? + value null. +
  4. +
  5. + Otherwise, return the result of + converting V + using the rules for the inner IDL type T. +
  6. +
+

+ The result of converting + an IDL nullable type value to an ECMAScript value is: +

+ +
+ +
+

4.2.25 Sequences — sequence<T>

+ +

+ IDL sequence<T> values are represented by + ECMAScript Array values. +

+

+ An ECMAScript value V is converted + to an IDL sequence<T> value as follows: +

+
    +
  1. + If V is not an object, + throw a + TypeError. +
  2. +
  3. + If V object, + throw a + TypeError. +
  4. +
  5. + Let method be the result of + GetMethod(V, @@iterator). +
  6. +
  7. + ReturnIfAbrupt(method). +
  8. +
  9. + If method is undefined, + throw a + TypeError. +
  10. +
  11. + Return the result of + creating a sequence + of type sequence<T> + from V and method. +
  12. +
+ +

+ An IDL sequence value S of type + sequence<T> is + converted + to an ECMAScript Array object as follows: +

+
    +
  1. Let n be the length of S.
  2. +
  3. Let A be a new Array object created as if by the expression [].
  4. +
  5. Initialize i to be 0.
  6. +
  7. While i < n: +
      +
    1. Let V be the value in S at index i.
    2. +
    3. Let E be the result of converting + V to an ECMAScript value.
    4. +
    5. Let P be the result of calling ToString(i).
    6. +
    7. Call the [[DefineOwnProperty]] internal method on A with property name P, + Property Descriptor { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true, [[Value]]: E } + and Boolean flag false.
    8. +
    9. Set i to i + 1.
    10. +
    +
  8. +
  9. Return A.
  10. +
+ +
+
4.2.25.1 Creating a sequence from an iterable
+

+ To create an IDL value of type sequence<T> given an + iterable iterable and an iterator getter + method, perform the following steps: +

+
    +
  1. + Let iter be + GetIterator(iterable, method). +
  2. +
  3. + ReturnIfAbrupt(iter). +
  4. +
  5. Initialize i to be 0.
  6. +
  7. Repeat +
      +
    1. + Let next be IteratorStep(iter). +
    2. +
    3. ReturnIfAbrupt(next).
    4. +
    5. + If next is false, + then return an IDL sequence value of type + sequence<T> + of length i, where the value of the element + at index j is + Sj. +
    6. +
    7. + Let nextItem be + IteratorValue(next). +
    8. +
    9. ReturnIfAbrupt(nextItem).
    10. +
    11. + Initialize Si to the result of + converting + nextItem to an IDL value of type T. +
    12. +
    13. Set i to i + 1.
    14. +
    +
  8. +
+
+
Example
+

+ The following interface defines + an attribute of a sequence + type as well as an operation + with an argument of a sequence type. +

+
IDL
interface Canvas {
+
+  sequence<DOMString> getSupportedImageCodecs();
+
+  void drawPolygon(sequence<double> coordinates);
+  sequence<double> getLastDrawnPolygon();
+
+  // ...
+};
+

+ In an ECMAScript implementation of this interface, an Array + object with elements of type String is used to + represent a sequence<DOMString>, while an + Array with elements of type Number + represents a sequence<double>. The + Array objects are effectively passed by + value; every time the getSupportedImageCodecs() + function is called a new Array is + returned, and whenever an Array is + passed to drawPolygon no reference + will be kept after the call completes. +

+
ECMAScript

+// Obtain an instance of Canvas.  Assume that getSupportedImageCodecs()
+// returns a sequence with two DOMString values: "image/png" and "image/svg+xml".
+var canvas = getCanvas();
+
+// An Array object of length 2.
+var supportedImageCodecs = canvas.getSupportedImageCodecs();
+
+// Evaluates to "image/png".
+supportedImageCodecs[0];
+
+// Each time canvas.getSupportedImageCodecs() is called, it returns a
+// new Array object.  Thus modifying the returned Array will not
+// affect the value returned from a subsequent call to the function.
+supportedImageCodecs[0] = "image/jpeg";
+
+// Evaluates to "image/png".
+canvas.getSupportedImageCodecs()[0];
+
+// This evaluates to false, since a new Array object is returned each call.
+canvas.getSupportedImageCodecs() == canvas.getSupportedImageCodecs();
+
+
+// An Array of Numbers...
+var a = [0, 0, 100, 0, 50, 62.5];
+
+// ...can be passed to a platform object expecting a sequence<double>.
+canvas.drawPolygon(a);
+
+// Each element will be converted to a double by first calling ToNumber().
+// So the following call is equivalent to the previous one, except that
+// "hi" will be alerted before drawPolygon() returns.
+a = [false, '',
+     { valueOf: function() { alert('hi'); return 100; } }, 0,
+     '50', new Number(62.5)];
+canvas.drawPolygon(s);
+
+// Modifying an Array that was passed to drawPolygon() is guaranteed not to
+// have an effect on the Canvas, since the Array is effectively passed by value.
+a[4] = 20;
+var b = canvas.getLastDrawnPolygon();
+alert(b[4]);    // This would alert "50".
+
+ +
+ +
+

4.2.26 Promise types — Promise<T>

+ +

+ IDL promise type values are + represented by ECMAScript Promise + objects. +

+

+ An ECMAScript value V is + converted + to an IDL Promise<T> value as follows: +

+
    +
  1. Let resolve be the original value of %Promise%.resolve. + +
  2. +
  3. Let promise be the result of calling resolve with %Promise% + as the this value and V as the single argument value.
  4. +
  5. Return the IDL promise type value that is a reference to the + same object as promise.
  6. +
+

+ The result of converting + an IDL promise type value to an ECMAScript + value is the Promise value that represents a reference to the same object that the + IDL promise type represents. +

+

+ One can perform some steps once a promise is settled. + There can be one or two sets of steps to perform, covering when the promise is fulfilled, rejected, or both. + When a specification says to perform some steps once a promise is settled, the following steps + MUST be followed: +

+
    +
  1. Let promise be the promise object of type Promise<T>.
  2. +
  3. + Let onFulfilled be a new function object whose + behavior when invoked is as follows: +
      +
    1. If T is void, then: +
      1. Return the result of performing any steps that were required to be run if the promise was fulfilled.
    2. +
    3. Otherwise, T is a type other than void: +
        +
      1. Let V be the first argument to onFulfilled.
      2. +
      3. Let value be the result of converting + V to an IDL value of type T.
      4. +
      5. If there are no steps that are required to be run if the promise was fulfilled, then + return undefined.
      6. +
      7. Otherwise, return the result of performing any steps that were required to be run if the promise was fulfilled, + with value as the promise’s value.
      8. +
      +
    4. +
    +
  4. +
  5. + Let onRejected be a new function object whose + behavior when invoked is as follows: +
      +
    1. Let R be the first argument to onRejected.
    2. +
    3. Let reason be the result of converting + R to an IDL value of type any.
    4. +
    5. If there are no steps that are required to be run if the promise was rejected, then + return undefined.
    6. +
    7. Otherwise, return the result of performing any steps that were required to be run if the promise was rejected, + with reason as the rejection reason.
    8. +
    +
  6. +
  7. Let then be the result of calling the internal [[Get]] method of promise with property name “then”.
  8. +
  9. If then is not callable, then throw a TypeError.
  10. +
  11. Return the result of calling then with promise as the this value and onFulfilled and onRejected + as its two arguments.
  12. +
+ +
+ +
+

4.2.27 Union types

+ +

+ IDL union type values are + represented by ECMAScript values that correspond to the union’s + member types. +

+

+ To convert an ECMAScript value V to an IDL union type + value is done as follows: +

+
    +
  1. If the union type + includes a nullable type and + V is null or undefined, + then return the IDL value null.
  2. +
  3. + Let types be the flattened member types + of the union type. +
  4. +
  5. + If V is a platform object, but not a + platform array object, then: +
      +
    1. If types includes an interface type that V + implements, then return the IDL value that is a reference to the object V.
    2. +
    3. If types includes object, then return the IDL value + that is a reference to the object V.
    4. +
    +
  6. +
  7. + If V object, then: +
      + +
    1. If types includes object, then return the IDL value + that is a reference to the object V.
    2. +
    +
  8. +
  9. + If V is a DOMException platform object, then: +
      +
    1. If types includes DOMException or + Error, then return the + result of converting + V to that type.
    2. +
    3. If types includes object, then return the IDL value + that is a reference to the object V.
    4. +
    +
  10. +
  11. + If V is a native Error object (that is, it has an [[ErrorData]] internal slot), then: +
      +
    1. If types includes Error, then return the + result of converting + V to Error.
    2. +
    3. If types includes object, then return the IDL value + that is a reference to the object V.
    4. +
    +
  12. +
  13. + If V is an object with an [[ArrayBufferData]] internal slot, then: +
      +
    1. If types includes ArrayBuffer, then return the + result of converting + V to ArrayBuffer.
    2. +
    3. If types includes object, then return the IDL value + that is a reference to the object V.
    4. +
    +
  14. +
  15. + If V is an object with a [[DataView]] internal slot, then: +
      +
    1. If types includes DataView, then return the + result of converting + V to DataView.
    2. +
    3. If types includes object, then return the IDL value + that is a reference to the object V.
    4. +
    +
  16. +
  17. + If V is an object with a [[TypedArrayName]] internal slot, then: +
      +
    1. If types includes a typed array type + whose name is the value of V’s [[TypedArrayName]] internal slot, then return the + result of converting + V to that type.
    2. +
    3. If types includes object, then return the IDL value + that is a reference to the object V.
    4. +
    +
  18. +
  19. + If IsCallable(V) is true, then: +
      +
    1. If types includes a callback function + type, then return the result of + converting + V to that callback function type.
    2. +
    3. If types includes object, then return the IDL value + that is a reference to the object V.
    4. +
    +
  20. +
  21. If V is null or undefined object, then: +
      +
    1. If types includes a dictionary type, then return the + result of converting + V to that dictionary type.
    2. +
    +
  22. +
  23. + If V object, then: +
      +
    1. + If types includes a sequence type, then +
        +
      1. + Let method be the result of + GetMethod(V, @@iterator). +
      2. +
      3. + ReturnIfAbrupt(method). +
      4. +
      5. + If method is not + undefined, + return the result of + creating a + sequence of that type from V and + method. +
      6. +
      +
    2. + +
    3. If types includes a callback interface + type, then return the result of + converting + V to that interface type.
    4. +
    5. If types includes object, then return the IDL value + that is a reference to the object V.
    6. +
    +
  24. +
  25. + If V is a Boolean value, then: +
      +
    1. + If types includes a boolean, + then return the result of converting + V to boolean. +
    2. +
    +
  26. +
  27. + If V is a Number value, then: +
      +
    1. + If types includes a numeric type, + then return the result of converting + V to that numeric type. +
    2. +
    +
  28. +
  29. + If types includes a string type, + then return the result of + converting + V to that type. +
  30. +
  31. + If types includes a numeric type, + then return the result of converting + V to that numeric type. +
  32. +
  33. + If types includes a boolean, + then return the result of converting + V to boolean. +
  34. +
  35. + Throw a TypeError. +
  36. +
+

+ An IDL union type value is + converted to an ECMAScript value + as follows. If the value is an object + reference to a special object that represents an ECMAScript undefined + value, then it is converted to the ECMAScript + undefined value. Otherwise, + the rules for converting the specific type + of the IDL union type value as described in this section (). +

+
+ + + +
+

4.2.28 Error

+ +

+ IDL Error values are represented + by native ECMAScript Error objects and + platform objects for DOMExceptions. +

+

+ An ECMAScript value V is + converted + to an IDL Error value + by running the following algorithm: +

+
    +
  1. If Type(V) is not Object, + or V does not have an [[ErrorData]] internal slot, then throw a TypeError.
  2. +
  3. Return the IDL Error value that is a reference to the same object as V.
  4. +
+

+ The result of converting + an IDL Error value to an ECMAScript + value is the Error value that represents a reference to the same object that the + IDL Error represents. +

+
+ +
+

4.2.29 DOMException

+ +

+ IDL DOMException values are represented + by platform objects for DOMExceptions. +

+

+ An ECMAScript value V is + converted + to an IDL DOMException value + by running the following algorithm: +

+
    +
  1. If Type(V) is not Object, + or V is not a platform object that represents a DOMException, then throw a TypeError.
  2. +
  3. Return the IDL DOMException value that is a reference to the same object as V.
  4. +
+

+ The result of converting + an IDL DOMException value to an ECMAScript + value is the Object value that represents a reference to the same object that the + IDL DOMException represents. +

+
+ +
+

4.2.30 Buffer source types

+ +

+ Values of the IDL buffer source types + are represented by objects of the corresponding ECMAScript class. +

+

+ An ECMAScript value V is + converted + to an IDL ArrayBuffer value + by running the following algorithm: +

+
    +
  1. If Type(V) is not Object, + or V does not have an [[ArrayBufferData]] internal slot, + or IsDetachedBuffer(V) is true, + then throw a TypeError. +
  2. +
  3. Return the IDL ArrayBuffer value that is a reference to the same object as V.
  4. +
+

+ An ECMAScript value V is + converted + to an IDL DataView value + by running the following algorithm: +

+
    +
  1. If Type(V) is not Object, + or V does not have a [[DataView]] internal slot, + then throw a TypeError.
  2. +
  3. Return the IDL DataView value that is a reference to the same object as V.
  4. +
+

+ An ECMAScript value V is + converted + to an IDL Int8Array, + Int16Array, + Int32Array, + Uint8Array, + Uint16Array, + Uint32Array, + Uint8ClampedArray, + Float32Array or + Float64Array value + by running the following algorithm: +

+
    +
  1. Let T be the IDL type V is being converted to.
  2. +
  3. If Type(V) is not Object, + or V does not have a [[TypedArrayName]] internal slot + with a value equal to the name of T, + then throw a TypeError.
  4. +
  5. Return the IDL value of type T that is a reference to the same object as V.
  6. +
+

+ The result of converting + an IDL value of any buffer source type + to an ECMAScript value is the Object value that represents + a reference to the same object that the IDL value represents. +

+

+ When getting a reference to + or getting a copy of the bytes held by a buffer source + that is an ECMAScript ArrayBuffer, DataView + or typed array object, these steps MUST be followed: +

+
    +
  1. Let O be the ECMAScript object that is the buffer source.
  2. +
  3. Initialize arrayBuffer to O.
  4. +
  5. Initialize offset to 0.
  6. +
  7. Initialize length to 0.
  8. +
  9. If O has a [[ViewedArrayBuffer]] internal slot, then: +
      +
    1. Set arrayBuffer to the value of O’s [[ViewedArrayBuffer]] internal slot.
    2. +
    3. If arrayBuffer is undefined, then + throw a TypeError.
    4. +
    5. Set offset to the value of O’s [[ByteOffset]] internal slot.
    6. +
    7. Set length to the value of O’s [[ByteLength]] internal slot.
    8. +
    +
  10. +
  11. Otherwise, set length to the value of O’s [[ArrayBufferByteLength]] internal slot.
  12. +
  13. If IsDetachedBuffer(O), then + throw a TypeError.
  14. +
  15. Let data be the value of O’s [[ArrayBufferData]] internal slot.
  16. +
  17. Return a reference to or copy of (as required) the length bytes in data + starting at byte offset offset.
  18. +
+

+ To detach an ArrayBuffer, + these steps MUST be followed: +

+
    +
  1. Let O be the ECMAScript object that is the ArrayBuffer.
  2. +
  3. DetachArrayBuffer(O).
  4. +
+
+ + +
+ +
+

4.3 ECMAScript-specific extended attributes

+ +

+ This section defines a number of + extended attributes + whose presence affects only the ECMAScript binding. +

+ +
+

4.3.1 [Clamp]

+ +

+ If the [Clamp] + extended attribute + appears on an operation argument, + writable attribute or + dictionary member + whose type is one of the integer types, + it indicates that when an ECMAScript Number is + converted to the IDL type, out of range values will be clamped to the range + of valid values, rather than using the operators that use a modulo operation + (ToInt32, ToUint32, etc.). +

+

+ The [Clamp] + extended attribute MUST + take no arguments. +

+

+ The [Clamp] extended attribute + MUST NOT appear on a read only + attribute, or an attribute, operation argument or dictionary member + that is not of an integer type. It also MUST NOT + be used in conjunction with the [EnforceRange] + extended attribute. +

+

+ See the rules for converting ECMAScript values to the various IDL integer + types in section 4.2 + for the specific requirements that the use of + [Clamp] entails. +

+ +
Example
+

+ In the following IDL fragment, + two operations are declared that + take three octet arguments; one uses + the [Clamp] extended attribute + on all three arguments, while the other does not: +

+
IDL
interface GraphicsContext {
+  void setColor(octet red, octet green, octet blue);
+  void setColorClamped([Clamp] octet red, [Clamp] octet green, [Clamp] octet blue);
+};
+

+ In an ECMAScript implementation of the IDL, a call to setColorClamped with + Number values that are out of range for an + octet are clamped to the range [0, 255]. +

+
ECMAScript
// Get an instance of GraphicsContext.
+var context = getGraphicsContext();
+
+// Calling the non-[Clamp] version uses ToUint8 to coerce the Numbers to octets.
+// This is equivalent to calling setColor(255, 255, 1).
+context.setColor(-1, 255, 257);
+
+// Call setColorClamped with some out of range values.
+// This is equivalent to calling setColorClamped(0, 255, 255).
+context.setColorClamped(-1, 255, 257);
+
+
+ +
+

4.3.2 [Constructor]

+ +

+ If the [Constructor] + extended attribute + appears on an interface, it indicates that + the interface object for this interface + will have an [[Construct]] internal method, + allowing objects implementing the interface to be constructed. +

+

+ If it appears on a dictionary, then it + indicates that the ECMAScript global object will have a property whose + name is the identifier of the dictionary and whose value is a constructor + function that can return an ECMAScript object that represents a dictionary + value of the given type. +

+

+ Multiple [Constructor] extended + attributes may appear on a given interface or dictionary. +

+

+ The [Constructor] + extended attribute MUST either + take no arguments or + take an argument list. + The bare form, [Constructor], has the same meaning as + using an empty argument list, [Constructor()]. For each + [Constructor] extended attribute + on the interface, there will be a way to construct an object that implements + the interface by passing the specified arguments. +

+

+ The prose definition of a constructor MUST + either return an IDL value of a type corresponding to the interface + or dictionary the [Constructor] + extended attribute appears on, or throw an exception. +

+

+ If the [Constructor] extended attribute + is specified on an interface, then the [NoInterfaceObject] + extended attribute MUST NOT also be specified on that interface. +

+

+ The [Constructor] extended attribute + MUST NOT be used on a callback interface. +

+

+ See section 4.5.1.1 + for details on how a constructor + for an interface is to be implemented, and + section 4.5.3 + for how a constructor for a dictionary is to be implemented. +

+ +
Example
+

+ The following IDL defines two interfaces. The second has the + [Constructor] extended + attribute, while the first does not. +

+
IDL
interface NodeList {
+  Node item(unsigned long index);
+  readonly attribute unsigned long length;
+};
+
+[Constructor,
+ Constructor(double radius)]
+interface Circle {
+  attribute double r;
+  attribute double cx;
+  attribute double cy;
+  readonly attribute double circumference;
+};
+

+ An ECMAScript implementation supporting these interfaces would + have a [[Construct]] property on the + Circle interface object which would + return a new object that implements the interface. It would take + either zero or one argument. The + NodeList interface object would not + have a [[Construct]] property. +

+
ECMAScript
var x = new Circle();      // The uses the zero-argument constructor to create a
+                           // reference to a platform object that implements the
+                           // Circle interface.
+
+var y = new Circle(1.25);  // This also creates a Circle object, this time using
+                           // the one-argument constructor.
+
+var z = new NodeList();    // This would throw a TypeError, since no
+                           // [Constructor] is declared.
+
+ +
Example
+

+ The following IDL defines a dictionary type with a constructor: +

+
IDL
[Constructor(unsigned long patties, unsigned long cheeseSlices)]
+dictionary BurgerOrder {
+  unsigned long pattyCount;
+  unsigned long cheeseSliceCount;
+};
+

+ The constructor is defined with the following prose: +

+
+

When the BurgerOrder constructor + is invoked, it must return a dictionary value of type + BurgerOrder whose + pattyCount and cheeseSliceCount members are set to the + values of the patties and cheeseSlices arguments, respectively.

+
+

+ An ECMAScript implementation supporting this dictionary type + would have a constructor function on the global object that + returns a plain object with properties corresponding to + the dictionary’s members: +

+
ECMAScript
typeof BurgerOrder;                                // Evaluates to "function".
+
+var order = new BurgerOrder(1, 2);                 // Creates a new object.
+
+order.hasOwnProperty("pattyCount");                // Evaluates to true.
+order.pattyCount;                                  // Evaluates to 1.
+
+Object.getPrototypeOf(order) == Object.prototype;  // Evaluates to true.
+
+
+ +
+

4.3.3 [EnforceRange]

+ +

+ If the [EnforceRange] + extended attribute + appears on an operation argument, + writable regular attribute or + dictionary member + whose type is one of the integer types, + it indicates that when an ECMAScript Number is + converted to the IDL type, out of range values will cause an exception to + be thrown, rather than converted to being a valid value using the operators that use a modulo operation + (ToInt32, ToUint32, etc.). The Number + will be rounded towards zero before being checked against its range. +

+

+ The [EnforceRange] + extended attribute MUST + take no arguments. +

+

+ The [EnforceRange] extended attribute + MUST NOT appear on a read only + attribute, a static attribute, + or an attribute, operation argument or dictionary member + that is not of an integer type. It also MUST NOT + be used in conjunction with the [Clamp] + extended attribute. +

+

+ See the rules for converting ECMAScript values to the various IDL integer + types in section 4.2 + for the specific requirements that the use of + [EnforceRange] entails. +

+ +
Example
+

+ In the following IDL fragment, + two operations are declared that + take three octet arguments; one uses + the [EnforceRange] extended attribute + on all three arguments, while the other does not: +

+
IDL
interface GraphicsContext {
+  void setColor(octet red, octet green, octet blue);
+  void setColorEnforcedRange([EnforceRange] octet red, [EnforceRange] octet green, [EnforceRange] octet blue);
+};
+

+ In an ECMAScript implementation of the IDL, a call to setColorEnforcedRange with + Number values that are out of range for an + octet will result in an exception being + thrown. +

+
ECMAScript
// Get an instance of GraphicsContext.
+var context = getGraphicsContext();
+
+// Calling the non-[EnforceRange] version uses ToUint8 to coerce the Numbers to octets.
+// This is equivalent to calling setColor(255, 255, 1).
+context.setColor(-1, 255, 257);
+
+// When setColorEnforcedRange is called, Numbers are rounded towards zero.
+// This is equivalent to calling setColor(0, 255, 255).
+context.setColorEnforcedRange(-0.9, 255, 255.2);
+
+// The following will cause a TypeError to be thrown, since even after
+// rounding the first and third argument values are out of range.
+context.setColorEnforcedRange(-1, 255, 256);
+
+
+ +
+

4.3.4 [Exposed]

+

+ If the [Exposed] + extended attribute + appears on an interface, + partial interface, + an individual interface member, or + dictionary with a constructor, + it indicates that the interface, interface member or dictionary constructor is exposed + on a particular set of global interfaces, rather than the default of + being exposed only on the primary global interface. +

+

+ The [Exposed] + extended attribute + MUST either + take an identifier or + take an identifier list. + Each of the identifiers mentioned MUST be + a global name. +

+

+ Every construct that the [Exposed] + extended attribute + can be specified on has an exposure set, + which is a set of interfaces + defining which global environments the construct can be used in. + The exposure set + for a given construct is defined as follows: +

+ +

+ If [Exposed] appears on an + overloaded operation, + then it MUST appear identically on all overloads. +

+

+ The [Exposed] extended attribute + MUST NOT be specified on both an interface + member and a partial interface definition the interface member is declared on. +

+

+ If [Exposed] appears on both an interface + and one of its interface members, then the interface member's + exposure set + MUST be a subset of the interface's + exposure set. +

+

+ An interface's exposure set + MUST be a subset of the + exposure set of all + of the interface's consequential + interfaces. +

+

+ If an interface X + inherits from another interface + Y then the + exposure set of + X MUST be a subset of the + exposure set of + Y. +

+

+ The [Exposed] extended attribute + MUST NOT be specified on a dictionary + that does not also have a [Constructor] extended attribute. +

+

+ An interface, + interface member or + dictionary + is exposed in a given ECMAScript global environment if + the ECMAScript global object implements an interface that is in the + interface, interface member or dictionary's + exposure set. +

+

+ See + section 4.5 , + section 4.5.3 , + section 4.5.6 , + section 4.5.7 , + section 4.5.8 and + section 4.5.9 + for the specific requirements that the use of + [Exposed] entails. +

+
Example
+

[Exposed] + is intended to be used to control whether interfaces or individual interface + members are available for use only in workers, only in the Window, + or in both.

+

The following IDL fragment shows how that might be achieved:

+
IDL
[PrimaryGlobal]
+interface Window {
+  ...
+};
+
+// By using the same identifier Worker for both SharedWorkerGlobalScope
+// and DedicatedWorkerGlobalScope, both can be addressed in an [Exposed]
+// extended attribute at once.
+[Global=Worker]
+interface SharedWorkerGlobalScope : WorkerGlobalScope {
+  ...
+};
+
+[Global=Worker]
+interface DedicatedWorkerGlobalScope : WorkerGlobalScope {
+  ...
+};
+
+// MathUtils is available for use in workers and on the main thread.
+[Exposed=(Window,Worker)]
+interface MathUtils {
+  static double someComplicatedFunction(double x, double y);
+};
+
+// WorkerUtils is only available in workers.  Evaluating WorkerUtils
+// in the global scope of a worker would give you its interface object, while
+// doing so on the main thread will give you a ReferenceError.
+[Exposed=Worker]
+interface WorkerUtils {
+  static void setPriority(double x);
+};
+
+// Node is only available on the main thread.  Evaluating Node
+// in the global scope of a worker would give you a ReferenceError.
+interface Node {
+  ...
+};
+
+
+ + + +
+

4.3.5 [Global] and [PrimaryGlobal]

+ +

+ If the [Global] + or [PrimaryGlobal] + extended attribute + appears on an interface, + it indicates that objects implementing this interface can + be used as the global object in an ECMAScript environment, + and that the structure of the prototype chain and how + properties corresponding to interface members + will be reflected on the prototype objects will be different from other + interfaces. Specifically: +

+
    +
  1. Any named properties + will be exposed on an object in the prototype chain – the + named properties object – + rather than on the object itself.
  2. +
  3. Interface members from the + interface (or + consequential interfaces) + will correspond to properties on the object itself rather than on + interface prototype objects.
  4. +
+
Note
+

+ Placing named properties on an object in the prototype chain + is done so that variable declarations and bareword assignments + will shadow the named property with a property on the global + object itself. +

+

+ Placing properties corresponding to interface members on + the object itself will mean that common feature detection + methods like the following will work: +

+
ECMAScript
var indexedDB = window.indexedDB || window.webkitIndexedDB ||
+                window.mozIndexedDB || window.msIndexedDB;
+
+var requestAnimationFrame = window.requestAnimationFrame ||
+                            window.mozRequestAnimationFrame || ...;
+

+ Because of the way variable declarations are handled in + ECMAScript, the code above would result in the window.indexedDB + and window.requestAnimationFrame evaluating + to undefined, as the shadowing variable + property would already have been created before the + assignment is evaluated. +

+
+

+ If the [Global] or + [PrimaryGlobal] + extended attributes + is used on an interface, then: +

+ +

+ If [Global] or [PrimaryGlobal] is specified on + a partial interface + definition, then that partial interface definition MUST + be the part of the interface definition that defines + the named property getter. +

+

+ The [Global] and [PrimaryGlobal] + extended attribute MUST NOT + be used on an interface that can have more + than one object implementing it in the same ECMAScript global environment. +

+
Note
+

This is because the named properties object, + which exposes the named properties, is in the prototype chain, and it would not make + sense for more than one object’s named properties to be exposed on an object that + all of those objects inherit from.

+
+

+ If an interface is declared with the [Global] or + [PrimaryGlobal] + extended attribute, then + there MUST NOT be more than one + interface member across + the interface and its consequential interfaces + with the same identifier. + There also MUST NOT be more than + one stringifier, + or more than one serializer + across those interfaces. +

+
Note
+

This is because all of the members of the interface and its consequential + interfaces get flattened down on to the object that implements the interface.

+
+

+ The [Global] and + [PrimaryGlobal] extended attributes + can also be used to give a name to one or more global interfaces, + which can then be referenced by the [Exposed] + extended attribute. +

+

+ The [Global] and + [PrimaryGlobal] + extended attributes MUST either + take no arguments + or take an identifier list. +

+

+ If the [Global] or + [PrimaryGlobal] + extended attribute + is declared with an identifier list argument, then those identifiers are the interface’s + global names; otherwise, the interface has + a single global name, which is the interface's identifier. +

+
Note
+

The identifier argument list exists so that more than one global interface can + be addressed with a single name in an [Exposed] + extended attribute.

+
+

+ The [Global] and + [PrimaryGlobal] + extended attributes + MUST NOT be declared on the same + interface. The [PrimaryGlobal] + extended attribute MUST be declared on + at most one interface. The interface [PrimaryGlobal] + is declared on, if any, is known as the primary global interface. +

+

+ See section 4.5.5 , + section 4.7.3 and + section 4.7.7 + for the specific requirements that the use of + [Global] and [PrimaryGlobal] + entails for named properties, + and section 4.5.6 , + section 4.5.7 and + section 4.5.8 + for the requirements relating to the location of properties + corresponding to interface members. +

+
Example
+

+ The [PrimaryGlobal] + extended attribute is intended + to be used by the Window interface as defined in + HTML5 ([HTML5], section 5.2). ([Global] + is intended to be used by worker global interfaces.) + The Window interface exposes frames as properties on the Window + object. Since the Window object also serves as the + ECMAScript global object, variable declarations or assignments to the named properties + will result in them being replaced by the new value. Variable declarations for + attributes will not create a property that replaces the existing one. +

+
IDL
[PrimaryGlobal]
+interface Window {
+  getter any (DOMString name);
+  attribute DOMString name;
+  // ...
+};
+

+ The following HTML document illustrates how the named properties on the + Window object can be shadowed, and how + the property for an attribute will not be replaced when declaring + a variable of the same name: +

+
HTML
<!DOCTYPE html>
+<title>Variable declarations and assignments on Window</title>
+<iframe name=abc></iframe>
+<!-- Shadowing named properties -->
+<script>
+  window.abc;    // Evaluates to the iframe's Window object.
+  abc = 1;       // Shadows the named property.
+  window.abc;    // Evaluates to 1.
+</script>
+
+<!-- Preserving properties for IDL attributes -->
+<script>
+  Window.prototype.def = 2;         // Places a property on the prototype.
+  window.hasOwnProperty("length");  // Evaluates to true.
+  length;                           // Evaluates to 1.
+  def;                              // Evaluates to 2.
+</script>
+<script>
+  var length;                       // Variable declaration leaves existing property.
+  length;                           // Evaluates to 1.
+  var def;                          // Variable declaration creates shadowing property.
+  def;                              // Evaluates to undefined.
+</script>
+
+
+ + +
+

4.3.6 [LenientThis]

+

+ If the [LenientThis] + extended attribute + appears on a regular attribute, + it indicates that invocations of the attribute’s getter or setter + with a this value that is not an + object that implements the interface + on which the attribute appears will be ignored. +

+

+ The [LenientThis] extended attribute + MUST + take no arguments. + It MUST NOT be used on a + static attribute. +

+
Warning
+

+ Specifications SHOULD NOT use [LenientThis] + unless required for compatibility reasons. Specification authors who + wish to use this feature are strongly advised to discuss this on the + public-script-coord@w3.org + mailing list before proceeding. +

+
+

+ See the Attributes section for how + [LenientThis] + is to be implemented. +

+
Example
+

+ The following IDL fragment defines an interface that uses the + [LenientThis] extended + attribute. +

+
IDL
interface Example {
+  [LenientThis] attribute DOMString x;
+  attribute DOMString y;
+};
+

+ An ECMAScript implementation that supports this interface will + allow the getter and setter of the accessor property that corresponds + to x to be invoked with something other than an Example + object. +

+
ECMAScript
var example = getExample();  // Get an instance of Example.
+var obj = { };
+
+// Fine.
+example.x;
+
+// Ignored, since the this value is not an Example object and [LenientThis] is used.
+Object.getOwnPropertyDescriptor(Example.prototype, "x").get.call(obj);
+
+// Also ignored, since Example.prototype is not an Example object and [LenientThis] is used.
+Example.prototype.x;
+
+// Throws a TypeError, since Example.prototype is not an Example object.
+Example.prototype.y;
+
+
+ +
+

4.3.7 [NamedConstructor]

+

+ If the [NamedConstructor] + extended attribute + appears on an interface, + it indicates that the ECMAScript global object will have a property with the + specified name whose value is a constructor function that can + create objects that implement the interface. + Multiple [NamedConstructor] extended + attributes may appear on a given interface. +

+

+ The [NamedConstructor] + extended attribute MUST either + take an identifier or + take a named argument list. + The first form, [NamedConstructor=identifier], has the same meaning as + using an empty argument list, [NamedConstructor=identifier()]. For each + [NamedConstructor] extended attribute + on the interface, there will be a way to construct an object that implements + the interface by passing the specified arguments to the constructor function + that is the value of the aforementioned property. +

+

+ The identifier used for the named constructor MUST NOT + be the same as that used by an [NamedConstructor] + extended attribute on another interface, MUST NOT + be the same as an identifier of an interface + that has an interface object, + and MUST NOT be one of the + reserved identifiers. +

+

+ The [NamedConstructor] extended attribute + MUST NOT be used on a callback interface. +

+

+ See section 4.5.2 + for details on how named constructors + are to be implemented. +

+ +
Example
+

+ The following IDL defines an interface that uses the + [NamedConstructor] extended + attribute. +

+
IDL
[NamedConstructor=Audio,
+ NamedConstructor=Audio(DOMString src)]
+interface HTMLAudioElement : HTMLMediaElement {
+  // ...
+};
+

+ An ECMAScript implementation that supports this interface will + allow the construction of HTMLAudioElement + objects using the Audio constructor. +

+
ECMAScript
typeof Audio;                   // Evaluates to 'function'.
+
+var a1 = new Audio();           // Creates a new object that implements
+                                // HTMLAudioElement, using the zero-argument
+                                // constructor.
+
+var a2 = new Audio('a.flac');   // Creates an HTMLAudioElement using the
+                                // one-argument constructor.
+
+
+ +
+

4.3.8 [NewObject]

+ +

+ If the [NewObject] + extended attribute + appears on a regular + or static + operation, + then it indicates that when calling the operation, + a reference to a newly created object + MUST always be returned. +

+

+ The [NewObject] + extended attribute MUST + take no arguments. +

+

+ The [NewObject] + extended attribute MUST NOT + be used on anything other than a regular + or static + operation + whose return type + is an interface type or + a promise type. +

+
Example
+

+ As an example, this extended attribute is suitable for use on + the createElement + operation on the Document + interface ([DOM], section 6.5), + since a new object should always be returned when + it is called. +

+
IDL
interface Document : Node {
+  [NewObject] Element createElement(DOMString localName);
+  ...
+};
+
+
+ +
+

4.3.9 [NoInterfaceObject]

+ +

+ If the [NoInterfaceObject] + extended attribute + appears on an interface, + it indicates that an + interface object + will not exist for the interface in the ECMAScript binding. +

+
Warning
+

+ The [NoInterfaceObject] extended attribute + SHOULD NOT be used on interfaces that are not + solely used as supplemental interfaces, + unless there are clear Web compatibility reasons for doing so. Specification authors who + wish to use this feature are strongly advised to discuss this on the + public-script-coord@w3.org + mailing list before proceeding. +

+
+

+ The [NoInterfaceObject] extended attribute + MUST take no arguments. +

+

+ If the [NoInterfaceObject] extended attribute + is specified on an interface, then the [Constructor] + extended attribute MUST NOT also be specified on that interface. + A [NamedConstructor] extended attribute is fine, + however. +

+

+ The [NoInterfaceObject] extended attribute + MUST NOT be specified on an interface that has any + static operations defined on it. +

+

+ The [NoInterfaceObject] extended attribute + MUST NOT be specified on a callback interface + unless it has a constant declared on it. + This is because callback interfaces without constants never have + interface objects. +

+

+ An interface that does not have the [NoInterfaceObject] extended + attribute specified MUST NOT inherit + from an interface that has the [NoInterfaceObject] extended + attribute specified. +

+

+ See section 4.5 + for the specific requirements that the use of + [NoInterfaceObject] entails. +

+
Example
+

+ The following IDL + fragment defines two interfaces, one whose interface object + is exposed on the ECMAScript global object, and one whose isn’t: +

+
IDL
interface Storage {
+  void addEntry(unsigned long key, any value);
+};
+
+[NoInterfaceObject]
+interface Query {
+  any lookupEntry(unsigned long key);
+};
+

+ An ECMAScript implementation of the above IDL would allow + manipulation of Storage’s + prototype, but not Query’s. +

+
ECMAScript
typeof Storage;                        // evaluates to "object"
+
+// Add some tracing alert() call to Storage.addEntry.
+var fn = Storage.prototype.addEntry;
+Storage.prototype.addEntry = function(key, value) {
+  alert('Calling addEntry()');
+  return fn.call(this, key, value);
+};
+
+typeof Query;                          // evaluates to "undefined"
+var fn = Query.prototype.lookupEntry;  // exception, Query isn’t defined
+
+
+
+ +
+

4.3.10 [OverrideBuiltins]

+ +

+ If the [OverrideBuiltins] + extended attribute + appears on an interface, + it indicates that for a platform object implementing the interface, + properties corresponding to all of + the object’s supported property names + will appear to be on the object, + regardless of what other properties exist on the object or its + prototype chain. This means that named properties will always shadow + any properties that would otherwise appear on the object. + This is in contrast to the usual behavior, which is for named properties + to be exposed only if there is no property with the + same name on the object itself or somewhere on its prototype chain. +

+

+ The [OverrideBuiltins] + extended attribute MUST + take no arguments + and MUST NOT appear on an interface + that does not define a named property getter + or that also is declared with the [Global] + or [PrimaryGlobal] + extended attribute. If the extended attribute is specified on + a partial interface + definition, then that partial interface definition MUST + be the part of the interface definition that defines + the named property getter. +

+

+ See section 4.7.1 + and section 4.7.7 + for the specific requirements that the use of + [OverrideBuiltins] entails. +

+
Example
+

+ The following IDL fragment + defines two interfaces, + one that has a named property getter + and one that does not. +

+
IDL
interface StringMap {
+  readonly attribute unsigned long length;
+  getter DOMString lookup(DOMString key);
+};
+
+[OverrideBuiltins]
+interface StringMap2 {
+  readonly attribute unsigned long length;
+  getter DOMString lookup(DOMString key);
+};
+

+ In an ECMAScript implementation of these two interfaces, + getting certain properties on objects implementing + the interfaces will result in different values: +

+
ECMAScript
// Obtain an instance of StringMap.  Assume that it has "abc", "length" and
+// "toString" as supported property names.
+var map1 = getStringMap();
+
+// This invokes the named property getter.
+map1.abc;
+
+// This fetches the "length" property on the object that corresponds to the
+// length attribute.
+map1.length;
+
+// This fetches the "toString" property from the object's prototype chain.
+map1.toString;
+
+
+// Obtain an instance of StringMap2.  Assume that it also has "abc", "length"
+// and "toString" as supported property names.
+var map2 = getStringMap2();
+
+// This invokes the named property getter.
+map2.abc;
+
+// This also invokes the named property getter, despite the fact that the "length"
+// property on the object corresponds to the length attribute.
+map2.length;
+
+// This too invokes the named property getter, despite the fact that "toString" is
+// a property in map2's prototype chain.
+map2.toString;
+
+
+ + + +
+

4.3.11 [PutForwards]

+ +

+ If the [PutForwards] + extended attribute + appears on a read only + regular attribute declaration whose type is + an interface type, + it indicates that assigning to the attribute will have specific behavior. + Namely, the assignment is “forwarded” to the attribute (specified by + the extended attribute argument) on the object that is currently + referenced by the attribute being assigned to. +

+

+ The [PutForwards] extended + attribute MUST take an identifier. + Assuming that: +

+ +

+ then there MUST be another + attribute B + declared on J whose identifier + is N. Assignment of a value to the attribute A + on an object implementing I will result in that value + being assigned to attribute B of the object that A + references, instead. +

+

+ Note that [PutForwards]-annotated + attributes can be + chained. That is, an attribute with the [PutForwards] + extended attribute + can refer to an attribute that itself has that extended attribute. + There MUST NOT exist a cycle in a + chain of forwarded assignments. A cycle exists if, when following + the chain of forwarded assignments, a particular attribute on + an interface is + encountered more than once. +

+

+ An attribute with the [PutForwards] + extended attribute MUST NOT also be declared + with the [Replaceable] + extended attribute. +

+

+ The [PutForwards] + extended attribute MUST NOT be used + on an attribute that + is not read only. +

+

+ The [PutForwards] extended attribute + MUST NOT be used on a + static attribute. +

+

+ The [PutForwards] extended attribute + MUST NOT be used on an attribute declared on + a callback interface. +

+

+ See the Attributes section for how + [PutForwards] + is to be implemented. +

+
Example
+

+ The following IDL fragment defines interfaces for names and people. + The [PutForwards] extended + attribute is used on the name attribute + of the Person interface to indicate + that assignments to that attribute result in assignments to the + full attribute of the + Person object: +

+
IDL
interface Name {
+  attribute DOMString full;
+  attribute DOMString family;
+  attribute DOMString given;
+};
+
+interface Person {
+  [PutForwards=full] readonly attribute Name name;
+  attribute unsigned short age;
+};
+

+ In the ECMAScript binding, this would allow assignments to the + “name” property: +

+
ECMAScript
var p = getPerson();           // Obtain an instance of Person.
+
+p.name = 'John Citizen';       // This statement...
+p.name.full = 'John Citizen';  // ...has the same behavior as this one.
+
+
+ +
+

4.3.12 [Replaceable]

+ +

+ If the [Replaceable] + extended attribute + appears on a read only + regular attribute, + it indicates that setting the corresponding property on the + platform object will result in + an own property with the same name being created on the object + which has the value being assigned. This property will shadow + the accessor property corresponding to the attribute, which + exists on the interface prototype object. +

+

+ The [Replaceable] + extended attribute MUST + take no arguments. +

+

+ An attribute with the [Replaceable] + extended attribute MUST NOT also be declared + with the [PutForwards] + extended attribute. +

+

+ The [Replaceable] + extended attribute MUST NOT be used + on an attribute that + is not read only. +

+

+ The [Replaceable] extended attribute + MUST NOT be used on a + static attribute. +

+

+ The [Replaceable] extended attribute + MUST NOT be used on an attribute declared on + a callback interface. +

+

+ See section 4.5.7 + for the specific requirements that the use of + [Replaceable] entails. +

+
Example
+

+ The following IDL fragment + defines an interface + with an operation + that increments a counter, and an attribute + that exposes the counter’s value, which is initially 0: +

+
IDL
interface Counter {
+  [Replaceable] readonly attribute unsigned long value;
+  void increment();
+};
+

+ Assigning to the “value” property + on a platform object implementing Counter + will shadow the property that corresponds to the + attribute: +

+
ECMAScript
var counter = getCounter();                              // Obtain an instance of Counter.
+counter.value;                                           // Evaluates to 0.
+
+counter.hasOwnProperty("value");                         // Evaluates to false.
+Object.getPrototypeOf(counter).hasOwnProperty("value");  // Evaluates to true.
+
+counter.increment();
+counter.increment();
+counter.value;                                           // Evaluates to 2.
+
+counter.value = 'a';                                     // Shadows the property with one that is unrelated
+                                                         // to Counter::value.
+
+counter.hasOwnProperty("value");                         // Evaluates to true.
+
+counter.increment();
+counter.value;                                           // Evaluates to 'a'.
+
+delete counter.value;                                    // Reveals the original property.
+counter.value;                                           // Evaluates to 3.
+
+
+ +
+

4.3.13 [SameObject]

+ +

+ If the [SameObject] + extended attribute + appears on a read only + attribute, then it + indicates that when getting the value of the attribute on a given + object, the same value MUST always + be returned. +

+

+ The [SameObject] + extended attribute MUST + take no arguments. +

+

+ The [SameObject] + extended attribute MUST NOT + be used on anything other than a read only + attribute + whose type is an interface type + or object. +

+
Example
+

+ As an example, this extended attribute is suitable for use on + the implementation + attribute on the Document + interface ([DOM], section 6.5), + since the same object is always returned for a given + Document object. +

+
IDL
interface Document : Node {
+  [SameObject] readonly attribute DOMImplementation implementation;
+  ...
+};
+
+
+ +
+

4.3.14 [TreatNonObjectAsNull]

+ +

+ If the [TreatNonObjectAsNull] + extended attribute + appears on a callback function, + then it indicates that any value assigned to an attribute + whose type is a nullable + callback function + that is not an object will be converted to + the null value. +

+
Warning
+

+ Specifications SHOULD NOT use [TreatNonObjectAsNull] + unless required to specify the behavior of legacy APIs or for consistency with these + APIs. Specification authors who + wish to use this feature are strongly advised to discuss this on the + public-script-coord@w3.org + mailing list before proceeding. At the time of writing, the only known + valid use of [TreatNonObjectAsNull] + is for the callback functions used as the type + of event handler IDL attributes + ([HTML5], section 6.1.6.1) + such as onclick and onerror. +

+
+

+ See section 4.2.24 + for the specific requirements that the use of + [TreatNonObjectAsNull] entails. +

+
Example
+

+ The following IDL fragment defines an interface that has one + attribute whose type is a [TreatNonObjectAsNull]-annotated + callback function and another whose type is a + callback function without the extended attribute: +

+
IDL
callback OccurrenceHandler = void (DOMString details);
+
+[TreatNonObjectAsNull]
+callback ErrorHandler = void (DOMString details);
+
+interface Manager {
+  attribute OccurrenceHandler? handler1;
+  attribute ErrorHandler? handler2;
+};
+

+ In an ECMAScript implementation, assigning a value that is not + an object (such as a Number value) + to handler1 will have different behavior from that when assigning + to handler2: +

+
ECMAScript
var manager = getManager();  // Get an instance of Manager.
+
+manager.handler1 = function() { };
+manager.handler1;            // Evaluates to the function.
+
+try {
+  manager.handler1 = 123;    // Throws a TypeError.
+} catch (e) {
+}
+
+manager.handler2 = function() { };
+manager.handler2;            // Evaluates to the function.
+
+manager.handler2 = 123;
+manager.handler2;            // Evaluates to null.
+
+
+ +
+

4.3.15 [TreatNullAs]

+ +

+ If the [TreatNullAs] + extended attribute + appears on an attribute + or operation argument whose type is + DOMString, + it indicates that a null value + assigned to the attribute or passed as the operation argument will be + handled differently from its default handling. Instead of being stringified + to “null”, which is the default, + it will be converted to the empty string “”. +

+

+ If [TreatNullAs] is specified on + an operation itself, and that operation is on a callback interface, + then it indicates that a user object implementing the interface will have the return + value of the function that implements the operation handled in the same way as for operation arguments + and attributes, as above. +

+

+ The [TreatNullAs] + extended attribute MUST take the identifier + EmptyString. +

+

+ The [TreatNullAs] extended attribute + MUST NOT be specified on an operation argument, + attribute or operation return value whose type is not DOMString. +

+
Note
+

This means that even an attribute of type DOMString? must not + use [TreatNullAs], since null + is a valid value of that type.

+
+

+ The [TreatNullAs] extended attribute + also MUST NOT be specified on an operation on + a non-callback interface. +

+

+ See section 4.2.16 + for the specific requirements that the use of + [TreatNullAs] entails. +

+
Example
+

+ The following IDL fragment defines an interface that has one + attribute with the [TreatNullAs] + extended attribute, and one operation with an argument that has + the extended attribute: +

+
IDL
interface Dog {
+  attribute DOMString name;
+  [TreatNullAs=EmptyString] attribute DOMString owner;
+
+  boolean isMemberOfBreed([TreatNullAs=EmptyString] DOMString breedName);
+};
+

+ An ECMAScript implementation implementing the Dog + interface would convert a null value + assigned to the “owner” property or passed as the + argument to the isMemberOfBreed function + to the empty string rather than "null": +

+
ECMAScript
var d = getDog();         // Assume d is a platform object implementing the Dog
+                          // interface.
+
+d.name = null;            // This assigns the string "null" to the .name
+                          // property.
+
+d.owner = null;           // This assigns the string "" to the .owner property.
+
+d.isMemberOfBreed(null);  // This passes the string "" to the isMemberOfBreed
+                          // function.
+
+
+ +
+

4.3.16 [Unforgeable]

+ +

+ If the [Unforgeable] + extended attribute + appears on a non-static + attribute + or non-static + operations, it indicates + that the attribute or operation will be reflected as an ECMAScript property in + a way that means its behavior cannot be modified and that performing + a property lookup on the object will always result in the attribute’s + property value being returned. In particular, the property will be + non-configurable and will exist as an own property on the object + itself rather than on its prototype. +

+

+ If the [Unforgeable] + extended attribute + appears on an interface, + it indicates that all of the non-static + attributes + and non-static + operations declared on + that interface and its consequential interfaces + will be similarly reflected as own ECMAScript properties on objects + that implement the interface, rather than on the prototype. +

+

+ An attribute or operation is said to be unforgeable + on a given interface A if any of the following are true: +

+ +

+ The [Unforgeable] + extended attribute MUST + take no arguments. +

+

+ The [Unforgeable] + extended attribute MUST NOT appear on + anything other than an attribute, + non-static operation + or an interface. If it does + appear on an operation, then + it MUST appear on all operations with + the same identifier on that interface. +

+

+ If an attribute or operation X is unforgeable + on an interface A, and A is one of the + inherited interfaces + of another interface B, then B and all of its + consequential interfaces + MUST NOT have a non-static attribute or + regular operation with the same + identifier as X. +

+
Note
+

For example, the following is disallowed:

+
IDL
interface A1 {
+  [Unforgeable] readonly attribute DOMString x;
+};
+interface B1 : A1 {
+  void x();  // Invalid; would be shadowed by A1's x.
+};
+
+interface B2 : A1 { };
+B2 implements Mixin;
+interface Mixin {
+  void x();  // Invalid; B2's copy of x would be shadowed by A1's x.
+};
+
+[Unforgeable]
+interface A2 {
+  readonly attribute DOMString x;
+};
+interface B3 : A2 {
+  void x();  // Invalid; would be shadowed by A2's x.
+};
+
+interface B4 : A2 { };
+B4 implements Mixin;
+interface Mixin {
+  void x();  // Invalid; B4's copy of x would be shadowed by A2's x.
+};
+
+interface A3 { };
+A3 implements A2;
+interface B5 : A3 {
+  void x();  // Invalid; would be shadowed by A3's mixed-in copy of A2's x.
+};
+
+

+ See section 4.5.7 , + section 4.5.8 , + section 4.7 , + section 4.7.1 and + section 4.7.7 + for the specific requirements that the use of + [Unforgeable] entails. +

+
Example
+

+ The following IDL fragment defines + an interface that has two attributes, + one of which is designated as [Unforgeable]: +

+
IDL
interface System {
+  [Unforgeable] readonly attribute DOMString username;
+  readonly attribute long long loginTime;
+};
+

+ In an ECMAScript implementation of the interface, the username attribute will be exposed as a non-configurable property on the + object itself: +

+
ECMAScript
var system = getSystem();                      // Get an instance of System.
+
+system.hasOwnProperty("username");             // Evaluates to true.
+system.hasOwnProperty("loginTime");            // Evaluates to false.
+System.prototype.hasOwnProperty("username");   // Evaluates to false.
+System.prototype.hasOwnProperty("loginTime");  // Evaluates to true.
+
+try {
+  // This call would fail, since the property is non-configurable.
+  Object.defineProperty(system, "username", { value: "administrator" });
+} catch (e) { }
+
+// This defineProperty call would succeed, because System.prototype.loginTime
+// is configurable.
+var forgedLoginTime = 5;
+Object.defineProperty(System.prototype, "loginTime", { value: forgedLoginTime });
+
+system.loginTime;  // So this now evaluates to forgedLoginTime.
+
+
+ + +
+ +
+

4.4 Security

+ +

+ Certain algorithms in the sections below are defined to + perform a security check on a given + object. This check is used to determine whether a given + operation invocation or + attribute access should be + allowed. The input to the security check is the + platform object on + which the operation invocation or attribute access is being done, + and the ECMAScript global environment associated with the + Function object that implements the + operation or attribute. +

+
Note
+

The expectation is that the HTML specification defines how a + security check is performed, and that it will either throw an + appropriate exception or return normally. [HTML]

+
+
+ +
+

4.5 Interfaces

+ +

+ For every interface that + is exposed in a given + ECMAScript global environment and: +

+ +

+ a corresponding property MUST exist on the + ECMAScript environment's global object. + The name of the property is the identifier of the interface, + and its value is an object called the interface object. +

+

+ The property has the attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. + The characteristics of an interface object are described in section 4.5.1 + . +

+ +

+ In addition, for every [NamedConstructor] + extended attribute on an exposed interface, a corresponding property MUST + exist on the ECMAScript global object. The name of the property is the + identifier that occurs directly after the + “=”, and its value is an object called a + named constructor, which allows + construction of objects that implement the interface. The property has the + attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. + The characteristics of a named constructor are described in + section 4.5.2 + . +

+ +
+

4.5.1 Interface object

+ +

+ The interface object for a given non-callback interface + is a function object. + It has properties that correspond to + the constants and + static operations + defined on that interface, as described in sections + 4.5.6 Constants4.5.6 and + 4.5.8 Operations4.5.8 + . +

+

+ The [[Prototype]] internal property of + an interface object for a non-callback interface is determined as + follows: +

+
    +
  1. + If the interface inherits from some other interface, the value + of [[Prototype]] is the interface + object for that other interface. +
  2. +
  3. + If the interface doesn't inherit from any other interface, + the value of [[Prototype]] is + %FunctionPrototype% ( + [ECMA-262] + , section 6.1.7.4). +
  4. +
+

+ An interface object for a non-callback interface MUST have a property named “prototype” + with attributes + { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: false } + whose value is an object called the interface prototype object. This object has properties + that correspond to the regular attributes and + regular operations defined on the interface, + and is described in more detail in + section 4.5.4 + . +

+
Note
+

Since an interface object for a non-callback interface is a function object the typeof operator will return + "function" when applied to + such an interface object.

+
+

+ The internal [[Prototype]] property + of an interface object for a callback interface MUST be + the Object.prototype object. +

+
Note
+

Remember that interface objects for callback interfaces only exist if they have + constants declared on them; + when they do exist, they are not function objects.

+
+ +
+
4.5.1.1 Interface object [[Call]] method
+ +

+ If the interface is declared with a + [Constructor] extended attribute, + then the interface object + can be called as a function to create an object that implements that + interface. Interfaces that do not have a constructor will throw + an exception when called as a function. +

+ + +

+ In order to define how overloaded constructor invocations are resolved, the + overload resolution algorithm + is defined. Its input is an effective overload set, + S, and a list of ECMAScript values, arg0..n−1. + Its output is a pair consisting of the operation or + extended attribute of one of S’s entries + and a list of IDL values or the special value “missing”. The algorithm behaves as follows: +

+
    +
  1. Let maxarg be the length of the longest type list of the entries in S.
  2. +
  3. Initialize argcount to be min(maxargn).
  4. + +
  5. Remove from S all entries whose type list is not of length argcount.
  6. + +
  7. If S is empty, then throw a TypeError.
  8. + +
  9. Initialize d to −1.
  10. + +
  11. + Initialize method to + undefined. +
  12. + +
  13. If there is more than one entry in S, then set + d to be the distinguishing argument index + for the entries of S.
  14. + +
  15. Initialize values to be an empty list, where each entry will be either an IDL value or the special value “missing”.
  16. + +
  17. Initialize i to 0.
  18. + +
  19. While i < d: +
      +
    1. Let V be argi.
    2. +
    3. Let type be the type at index i in the type list of any entry in S. +
      Note

      All entries in S at this point have the same type and optionality value at index i.

      +
    4. +
    5. Let optionality be the value at index i in the list of optionality values of any entry in S.
    6. +
    7. If optionality is “optional” and V is undefined, then: +
        +
      1. If the argument at index i is declared with a default value, + then append to values that default value.
      2. +
      3. Otherwise, append to values the special value “missing”.
      4. +
      +
    8. +
    9. Otherwise, append to values the result of converting + V to IDL type type.
    10. +
    11. Set i to i + 1.
    12. +
    +
  20. + +
  21. If i = d, then: +
      +
    1. Let V be argi. +
      Note

      This is the argument that will be used to resolve which overload is selected.

    2. + +
    3. If V is undefined, and there is an entry in S + whose list of optionality values has “optional” at index i, + then remove from S all other entries.
    4. + +
    5. Otherwise: if V is null or undefined, + and there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    6. + +
    7. + Otherwise: if V is a platform object – but not a + platform array object – and + there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    8. + + + + + +
    9. + Otherwise: if V is a DOMException platform object and + there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    10. + +
    11. + Otherwise: if V is an Error object (that is, it has an [[ErrorData]] internal slot) and + there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    12. + +
    13. + Otherwise: if V is an object with an [[ArrayBufferData]] internal slot and + there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    14. + +
    15. + Otherwise: if V is an object with a [[DataView]] internal slot and + there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    16. + +
    17. + Otherwise: if V is an object with a [[TypedArrayName]] internal slot and + there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    18. + +
    19. + Otherwise: if IsCallable(V) is true, + and there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    20. + +
    21. + Otherwise: if V object, and + there is an entry in S that has one of the + following types at position i of its type list, + + and after performing the following steps, +
        +
      1. + Let method be the result of + GetMethod(V, @@iterator). +
      2. +
      3. + ReturnIfAbrupt(method). +
      4. +
      + method is not undefined, then remove from S all + other entries. +
    22. + +
    23. + Otherwise: if V object, and + there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    24. + +
    25. + Otherwise: if V is a Boolean value, + and there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    26. + +
    27. + Otherwise: if V is a Number value, + and there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    28. + +
    29. + Otherwise: if there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    30. + +
    31. + Otherwise: if there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    32. + +
    33. + Otherwise: if there is an entry in S that has one of the following types at position i of its type list, + + then remove from S all other entries. +
    34. + +
    35. + Otherwise: if there is an entry in S that has any at position i + of its type list, + then remove from S all other entries. +
    36. + +
    37. + Otherwise: + throw a TypeError. +
    38. +
    +
  22. + +
  23. Let callable be the operation or extended attribute + of the single entry in S.
  24. + +
  25. + If i = d and method is not undefined, then +
      +
    1. + Let V be argi. +
    2. +
    3. + Let T be the type at index i in the + type list of the remaining entry in S. +
    4. +
    5. + If T is a sequence type, then + append to values the result of + creating a sequence + of type T from + V and method. +
    6. + +
    7. + Set i to i + 1. +
    8. +
    +
  26. + +
  27. + While i < argcount: +
      +
    1. Let V be argi.
    2. +
    3. Let type be the type at index i in the type list of the remaining entry in S.
    4. +
    5. Let optionality be the value at index i in the list of optionality values of the remaining entry in S.
    6. +
    7. If optionality is “optional” and V is undefined, then: +
        +
      1. If the argument at index i is declared with a default value, + then append to values that default value.
      2. +
      3. Otherwise, append to values the special value “missing”.
      4. +
      +
    8. +
    9. Otherwise, append to values the result of + converting V to IDL type type.
    10. +
    11. Set i to i + 1.
    12. +
    +
  28. + +
  29. While i is less than the number of arguments callable is declared to take: +
      +
    1. If callable’s argument at index i is declared with a default value, + then append to values that default value.
    2. +
    3. Otherwise, if callable’s argument at index i is not variadic, then append to values the special value “missing”.
    4. +
    5. Set i to i + 1.
    6. +
    +
  30. + +
  31. Return the pair <callable, values>.
  32. +
+
Note
+

+ The overload resolution algorithm performs both the identification + of which overloaded operation, constructor, etc. is being called, + and the conversion of the ECMAScript argument values to their + corresponding IDL values. Informally, it operates as follows. +

+

First, the selection of valid overloads is done by considering + the number of ECMAScript arguments that were passed in to the function:

+
    +
  • If there are more arguments passed in than the longest + overload argument list, then they are ignored.
  • +
  • After ignoring these trailing arguments, only overloads + that can take this exact number of arguments are considered. + If there are none, then a TypeError is thrown.
  • +
+

Once we have a set of possible overloads with the right number + of arguments, the ECMAScript values are converted from left to right. + The nature of the restrictions on overloading means that if we + have multiple possible overloads at this point, then there will + be one position in the argument list that will be used to + distinguish which overload we will finally select; this is + the distinguishing + argument index.

+

We first convert the arguments to the left of the distinguishing + argument. (There is a requirement that an argument to the left of + the distinguishing argument index has the same type as in the other + overloads, at the same index.) Then we inspect the type of the + ECMAScript value that is passed in at the distinguishing argument + index to determine which IDL type it may correspond to. + This allows us to select the final overload that will + be invoked. If the value passed in is undefined + and there is an overload with an optional argument at this position, then + we will choose that overload. If there is no valid overload for the type of + value passed in here, then we throw a TypeError. + The inspection of the value at the distinguishing argument index does not have any side effects; + the only side effects that come from running the overload resolution + algorithm are those that come from converting the ECMAScript values + to IDL values.

+

At this point, we have determined which overload to use. We now + convert the remaining arguments, from the distinguishing argument onwards, + again ignoring any additional arguments that were ignored due to being passed + after the last possible argument.

+

When converting an optional argument’s ECMAScript value to its equivalent IDL value, + undefined will be converted into + the optional argument’s default value, + if it has one, or a special value “missing” otherwise.

+

Optional arguments corresponding to a final, variadic argument do not treat + undefined as a special “missing” value, however. + The undefined value is converted to the type + of variadic argument as would be done for a non-optional argument.

+
+

+ The internal [[Call]] method + of the interface object behaves as follows, assuming + arg0..n−1 is the list + of argument values passed to the constructor, and I + is the interface: +

+
    +
  1. + If I was not declared with a [Constructor] + extended attribute, then + throw a TypeError. +
  2. +
  3. + Let id be the identifier of interface I. +
  4. +
  5. + Initialize S to the + effective overload set + for constructors with identifier + id on interface + I and with argument count n. +
  6. +
  7. + Let <constructor, values> be the result of passing S and + arg0..n−1 to the + overload resolution algorithm. +
  8. +
  9. + Let R be the result of performing the actions listed in the description of + constructor with values as the argument values. +
  10. +
  11. + Return the result of converting + R to an ECMAScript interface type value + I. +
  12. +
+

+ If the internal [[Call]] method + of the interface object + returns normally, then it MUST + return an object that implements interface I. + This object also MUST be + associated with the ECMAScript global environment associated + with the interface object. +

+

+ Interface objects for non-callback interfaces MUST have a property named “length” + with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } + whose value is a Number. + If the [Constructor] + extended attribute + does not appear on the interface definition, then the value is 0. + Otherwise, the value is determined as follows: +

+
    +
  1. + Let id be the identifier of interface I. +
  2. +
  3. + Initialize S to the + effective overload set + for constructors with + identifier + id on interface + I and with argument count 0. +
  4. +
  5. + Return the length of the shortest argument list of the entries in S. +
  6. +
+

+ All interface objects MUST have a + property named “name” with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } + whose value is the identifier of the corresponding interface. +

+
+ +
+
4.5.1.2 Interface object [[HasInstance]] method
+ +

+ The internal [[HasInstance]] method of every + interface object + A MUST behave as follows, + assuming V is the object + argument passed to [[HasInstance]]: +

+
    +
  1. If V is not an object, return false.
  2. +
  3. Let O be the result of calling the [[Get]] method of A with property name “prototype”.
  4. +
  5. If O is not an object, throw a TypeError exception.
  6. +
  7. If V is a platform object that implements the + interface for which O is the interface prototype object, + return true.
  8. +
  9. Repeat: +
      +
    1. Set V to the value of the [[Prototype]] internal property of V.
    2. +
    3. If V is null, return false.
    4. +
    5. If O and V refer to the same object, + return true.
    6. +
    +
  10. +
+
+
+ +
+

4.5.2 Named constructors

+ +

+ A named constructor + that exists due to one or more + [NamedConstructor] + extended attributes + with a given identifier + is a function object. + It MUST have a [[Call]] + internal property, which allows construction of objects that + implement the interface on which the + [NamedConstructor] + extended attributes appear. It behaves as follows, assuming + arg0..n−1 is the list + of argument values passed to the constructor, id + is the identifier of the constructor specified in the + extended attribute named argument list, + and I is the interface + on which the [NamedConstructor] + extended attribute appears: +

+
    +
  1. + Initialize S to the + effective overload set + for constructors with identifier + id on interface + I and with argument count n. +
  2. +
  3. + Let <constructor, values> be the result of passing S and + arg0..n−1 to the + overload resolution algorithm. +
  4. +
  5. + Let R be the result of performing the actions listed in the description of + constructor with values as the argument values. +
  6. +
  7. + Return the result of converting + R to an ECMAScript + interface type value + I. +
  8. +
+

+ If the internal [[Call]] method + of the named constructor + returns normally, then it MUST + return an object that implements interface I. + This object also MUST be + associated with the ECMAScript global environment associated + with the named constructor. +

+

+ A named constructor MUST have a property named “length” + with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } + whose value is a Number determined as follows: +

+
    +
  1. + Initialize S to the + effective overload set + for constructors with + identifier + id on interface + I and with argument count 0. +
  2. +
  3. + Return the length of the shortest argument list of the entries in S. +
  4. +
+

+ A named constructor MUST have a property named “name” + with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } + whose value is the identifier used for the named constructor. +

+

+ A named constructor MUST also have a property named + “prototype” with attributes + { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: false } + whose value is the interface prototype object + for the interface on which the + [NamedConstructor] + extended attribute + appears. +

+
+ +
+

4.5.3 Dictionary constructors

+ +

+ For every dictionary type + that has one or more [Constructor] + extended attributes + and which is exposed in a given + ECMAScript global environment, a corresponding property MUST exist on the + ECMAScript environment's global object. The name of the property is the + identifier of the dictionary, + and its value is a function object + called the dictionary constructor. +

+

+ The property has the attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. +

+

+ The internal [[Call]] method of the interface + object behaves as follows, assuming + arg0..n−1 is the list + of argument values passed to the constructor, and D + is the dictionary type: +

+
    +
  1. + Let id be the identifier of dictionary type D. +
  2. +
  3. + Initialize S to the + effective overload set + for constructors with identifier + id on dictionary type + D and with argument count n. +
  4. +
  5. + Let <constructor, values> be the result of passing S and + arg0..n−1 to the + overload resolution algorithm. +
  6. +
  7. + Let R be the result of performing the actions listed in the description of + constructor with values as the argument values. +
  8. +
  9. + Return the result of converting + R, which is a dictionary value of type D, to an ECMAScript value. +
  10. +
+

+ If the internal [[Call]] method + of the named constructor + returns normally, then it MUST + return an object that is + associated with the ECMAScript global environment associated + with the dictionary constructor. +

+

+ A dictionary constructor object MUST have a property named “length” + with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } + whose value is a Number determined as follows: +

+
    +
  1. + Let id be the identifier of the dictionary type. +
  2. +
  3. + Initialize S to the + effective overload set + for constructors with + identifier + id on dictionary D and with argument count 0. +
  4. +
  5. + Return the length of the shortest argument list of the entries in S. +
  6. +
+

+ A dictionary constructor object MUST have a property named “name” + with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } + whose value is the identifier of the dictionary. +

+
+ +
+

4.5.4 Interface prototype object

+ +

+ There MUST exist an + interface prototype + object for every non-callback interface + defined, regardless of whether the interface was declared with the + [NoInterfaceObject] + extended attribute. + The interface prototype object for a particular interface has + properties that correspond to the regular attributes + and regular operations + defined on that interface. These properties are described in more detail in + sections 4.5.7 Attributes4.5.7 and + 4.5.8 Operations4.5.8 . +

+

+ As with the interface object, + the interface prototype object also has properties that correspond to the + constants defined on that + interface, described in section + 4.5.8 . +

+

+ If the [NoInterfaceObject] + extended attribute was not specified on the interface, then + the interface prototype object MUST + also have a property named “constructor” with attributes + { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true } whose value + is a reference to the interface object for the interface. +

+ +

+ The interface prototype object + for a given interface A MUST have an internal + [[Prototype]] property whose value is returned from + the following steps: +

+
    +
  1. If A is declared with the [Global] + or [PrimaryGlobal] + extended attribute, and A + supports named properties, then + return the named properties object + for A, as defined in section 4.5.5 + .
  2. +
  3. Otherwise, if A is declared to inherit from another + interface, then return the + interface prototype object + for the inherited interface.
  4. + +
  5. Otherwise, return %ObjectPrototype% ( + [ECMA-262] + , section 6.1.7.4). + ([ECMA-262], section 15.2.4).
  6. +
+
Note
+

+ The interface prototype object + of an interface that is defined with + the [NoInterfaceObject] + extended attribute + will be accessible if the interface is used as a + non-supplemental interface. + For example, with the following IDL: +

+
IDL
[NoInterfaceObject]
+interface Foo {
+};
+
+partial interface Window {
+  attribute Foo foo;
+};
+

+ it is not possible to access the interface prototype object through + the interface object + (since it does not exist as window.Foo). However, an instance + of Foo can expose the interface prototype + object by gettings its internal [[Prototype]] + property value – Object.getPrototypeOf(window.foo) in + this example. +

+

+ If the interface is used solely as a + supplemental interface, + then there will be no way to access its interface prototype object, since no + object will have the interface prototype object as its internal + [[Prototype]] property value. In such cases, + it is an acceptable optimization for this object not to exist. +

+
+ + +

+ The class string of an + interface prototype object + is the concatenation of the interface’s + identifier and the string + “Prototype”. +

+
+ +
+

4.5.5 Named properties object

+ +

+ For every interface declared with the + [Global] or + [PrimaryGlobal] + extended attribute + that supports named properties, + there MUST exist an object known as the + named properties object for that + interface. +

+

+ The named properties object + for a given interface A MUST have an internal + [[Prototype]] property whose value is returned from + the following steps: +

+
    +
  1. If A is declared to inherit from another interface, then return the + interface prototype object + for the inherited interface.
  2. + +
  3. Otherwise, return %ObjectPrototype% ( + [ECMA-262] + , section 6.1.7.4).
  4. +
+

+ The class string of a + named properties object + is the concatenation of the interface’s + identifier and the string + “Properties”. +

+ +
+
4.5.5.1 Named properties object [[GetOwnProperty]] method
+ +

+ The internal [[GetOwnProperty]] method of every + named properties object + MUST behave as follows when called with object O + and property name P: +

+ +
    +
  1. Let A be the interface for the + named properties object O.
  2. +
  3. Let object be the sole object from O’s ECMAScript global environment that implements A. +
    Note
    +

    For example, if the interface is the Window + interface as defined in HTML5 ([HTML5], section 5.2), then the sole object + will be this global environment’s window object.

    +
    +
  4. +
  5. If the result of running the named property visibility algorithm with + property name P and object object is true, then: +
      +
    1. Let operation be the operation used to declare the named property getter.
    2. + +
    3. Let value be an uninitialized variable.
    4. +
    5. If operation was defined without an identifier, then + set value to the result of performing the steps listed in the interface description to + determine the value of a named property + with P as the name.
    6. +
    7. Otherwise, operation was defined with an identifier. Set value to the result + of performing the steps listed in the description of operation with P as the only argument value.
    8. + +
    9. Let desc be a newly created Property Descriptor ( + [ECMA-262] + , section 6.2.4) with no fields.
    10. +
    11. Set desc.[[Value]] to the result of converting + value to an ECMAScript value.
    12. +
    13. If the named property is defined to be unenumerable, + then set desc.[[Enumerable]] to false, + otherwise set it to true.
    14. +
    15. Set desc.[[Writable]] to true and + desc.[[Configurable]] to true.
    16. +
    17. Return desc.
    18. +
    +
  6. + +
  7. Return the result of calling the default [[GetOwnProperty]] internal method ( + [ECMA-262] + , section 9.1.5) on O passing P as the argument.
  8. +
+
+ +
+
4.5.5.2 Named properties object [[DefineOwnProperty]] method
+ +

+ The internal [[DefineOwnProperty]] method of every + named properties object + MUST behave as follows when called with object O + and property name P. The term “Reject” is used as defined in + section . +

+ +
    +
  1. Reject.
  2. +
+
+ +
+
4.5.5.3 Named properties object [[Delete]] method
+ +

+ The internal [[Delete]] method of every + named properties object + MUST behave as follows when called with object O and + property name P. +

+ +
    +
  1. Return false.
  2. +
+
+
+ + + +
+

4.5.6 Constants

+ +

+ For each exposed + constant defined on + an interface A, there + MUST be a corresponding property. + The property has the following characteristics: +

+
    +
  • The name of the property is the identifier of the constant.
  • +
  • + The location of the property is determined as follows: + +
  • +
  • The value of the property is that which is obtained by converting the constant’s IDL value to an ECMAScript value.
  • +
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • +
+

+ In addition, a property with the same characteristics MUST + exist on the interface object, if + that object exists. +

+
+ +
+

4.5.7 Attributes

+ +

+ For each exposed + attribute of the + interface, whether it + was declared on the interface itself or one of its + consequential interfaces, + there MUST exist a corresponding property. + The characteristics of this property are as follows: +

+
    +
  • + The name of the property is the identifier of the attribute. +
  • +
  • + The location of the property is determined as follows: + +
  • +
  • + The property has attributes { [[Get]]: G, [[Set]]: S, [[Enumerable]]: true, [[Configurable]]: configurable }, + where: + +
  • +
  • + The attribute getter is a Function + object whose behavior when invoked is as follows: +
      +
    1. Let idlValue be an IDL value determined as follows.
    2. +
    3. If the attribute is a regular attribute, then: +
        +
      1. Let I be the interface + whose interface prototype object + this property corresponding to the attribute appears on. +
        Note
        +

        This means that even if an implements statement was used to make + an attribute available on the interface, I is the interface + on the left hand side of the implements statement, and not the one + that the attribute was originally declared on.

        +
        +
      2. +
      3. Let O be the this value.
      4. +
      5. If O is a platform object, + then perform a security check on O + with the ECMAScript global environment associated with this Function that + implements the attribute getter.
      6. +
      7. If O is not a platform object that implements I, then: +
          +
        1. If the attribute was specified with the + [LenientThis] extended attribute, + then return undefined.
        2. +
        3. Otherwise, throw a TypeError.
        4. +
        +
      8. +
      9. + Set idlValue to be the result of performing the actions listed in the description of the attribute that occur when getting + (or those listed in the description of the inherited attribute, if this attribute is declared to + inherit its getter), + with O as the object. +
      10. +
      +
    4. +
    5. Otherwise, the attribute is a static attribute. + Set idlValue to be the result of performing the actions listed in the description of the attribute that occur when getting.
    6. +
    7. + Let V be the result of converting + idlValue to an ECMAScript value. +
    8. +
    9. + Return V. +
    10. +
    + The value of the Function object’s “length” + property is the Number value 0. +
  • +
  • + The attribute setter is undefined + if the attribute is declared readonly and has neither a + [PutForwards] nor a [Replaceable] + extended attribute declared on it. + Otherwise, it is a Function object whose behavior when invoked is as follows: +
      +
    1. If no arguments were passed to the Function, then + throw a TypeError.
    2. +
    3. Let V be the value of the first argument passed to the Function.
    4. +
    5. If the attribute is a regular attribute, then: +
        +
      1. Let I be the interface + whose interface prototype object + this property corresponding to the attribute appears on.
      2. +
      3. Let O be the this value.
      4. +
      5. If O is a platform object, + then perform a security check on O + with the ECMAScript global environment associated with this Function that + implements the attribute setter.
      6. +
      7. Let validThis be true if O is a + platform object that implements I, or + false otherwise.
      8. +
      9. If validThis is false and the + attribute was not specified with the + [LenientThis] extended attribute, + then throw a TypeError.
      10. +
      11. If the attribute is declared with a [Replaceable] + extended attribute, then: +
          +
        1. Let P be the identifier of the attribute.
        2. +
        3. Call the [[DefineOwnProperty]] method of O + passing property name P, Property Descriptor + { [[Value]]: V, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true }, + and false.
        4. +
        5. Return undefined.
        6. +
        +
      12. +
      13. If validThis is false, then return undefined.
      14. +
      15. If the attribute is declared with a [PutForwards] + extended attribute, then: +
          +
        1. Let Q be the result of calling the [[Get]] method + on O using the identifier of the attribute as the property name.
        2. +
        3. If Q is not an object, then throw a TypeError.
        4. +
        5. Let A be the attribute identified by the [PutForwards] extended attribute.
        6. +
        7. Call the [[Put]] method on Q + using the identifier of A as the property name and V as the value.
        8. +
        9. Return undefined.
        10. +
        +
      16. +
      +
    6. +
    7. Let idlValue be an IDL value determined as follows: +
        +
      • If the type of the attribute is an enumeration, then: +
          +
        1. Let S be the result of calling ToString(V).
        2. +
        3. If S is not one of the enumeration’s values, then return undefined.
        4. +
        5. The value of idlValue is the enumeration value equal to S.
        6. +
        +
      • +
      • Otherwise, the type of the attribute is not an enumeration. + The value of idlValue is the result of converting + V to an IDL value.
      • +
    8. +
    9. If the attribute is a regular attribute, then perform the actions listed in the description of the attribute that occur when setting, + with O as the object and idlValue as the value.
    10. +
    11. Otherwise, the attribute is a static attribute. + Perform the actions listed in the description of the attribute that occur when setting with idlValue as the value.
    12. +
    13. Return undefined.
    14. +
    + The value of the Function object’s “length” + property is the Number value 1. +
  • +
+
Note
+

+ Although there is only a single property for an IDL attribute, since + accessor property getters and setters are passed a this + value for the object on which property corresponding to the IDL attribute is + accessed, they are able to expose instance-specific data. +

+
+
Note
+

+ Note that attempting to assign to a property corresponding to a + read only attribute + results in different behavior depending on whether the script doing so is in strict mode. + When in strict mode, such an assignment will result in a TypeError + being thrown. When not in strict mode, the assignment attempt will be ignored. +

+
+
+ +
+

4.5.8 Operations

+ +

+ For each unique identifier + of an exposed operation + defined on the interface, there + MUST exist a corresponding property, + unless the effective overload set + for that identifier and operation + and with an argument count of 0 has no entries. + The characteristics of this property are as follows: +

+
    +
  • The name of the property is the identifier.
  • +
  • + The location of the property is determined as follows: + +
  • +
  • + The property has attributes + { [[Writable]]: B, [[Enumerable]]: true, [[Configurable]]: B }, + where B is false if the operation is + unforgeable on the interface, + and true otherwise. +
  • +
  • + The value of the property is a Function object whose + behavior is as follows, + assuming id is the + identifier, + arg0..n−1 is the list + of argument values passed to the function: +
      +
    1. + Try running the following steps: +
        +
      1. + Let I be the interface + whose interface prototype object + (or interface object, for a static + operation) this property corresponding to the operation appears on. +
        Note
        +

        This means that even if an implements statement was used to make + an operation available on the interface, I is the interface + on the left hand side of the implements statement, and not the one + that the operation was originally declared on.

        +
        +
      2. +
      3. + Let O be a value determined as follows: +
          +
        • + If the operation is a static operation, then O is null. +
        • + +
        • + Otherwise, if the this value is not null, + then O is the this value. +
        • +
        • + Otherwise, throw a TypeError. +
        • +
        +
      4. +
      5. If O is a platform object, + then perform a security check on O + with the ECMAScript global environment associated with this Function that + implements the operation.
      6. +
      7. + If O is not null and is also not a platform object + that implements interface I, throw a TypeError. +
      8. +
      9. + Initialize S to the + effective overload set + for regular operations + (if the operation is a regular operation) or for + static operations + (if the operation is a static operation) with + identifier + id on interface + I and with argument count n. +
      10. +
      11. + Let <operation, values> be the result of passing S and + arg0..n−1 to the + overload resolution algorithm. +
      12. +
      13. + Let R be the result of performing (on O, if the operation + is not a static operation) the actions listed in the description of + operation with values as the argument values. +
      14. +
      15. + Return the result of converting + R to an ECMAScript value of + the type op is declared to return. +
      16. +
      + And then, if an exception was thrown: +
        +
      1. If the operation has a return type + that is a promise type, then: +
          +
        1. Let reject be the initial value of %Promise%.reject.
        2. +
        3. Return the result of calling reject with %Promise% as the + this object and the exception as the single + argument value.
        4. +
        +
      2. +
      3. Otherwise, end these steps and allow the exception to propagate.
      4. +
      +
    2. +
    +
  • +
  • + The value of the Function object’s “length” + property is a Number determined as follows: +
      +
    1. + Let S be the + effective overload set + for regular operations + (if the operation is a regular operation) or for + static operations + (if the operation is a static operation) with + identifier + id on interface + I and with argument count 0. +
    2. +
    3. + Return the length of the shortest argument list of the entries in S. +
    4. +
    +
  • +
+ +
+
4.5.8.1 Stringifiers
+ +

+ If the interface + has an exposed + stringifier, then + there MUST exist a property with + the following characteristics: +

+
    +
  • The name of the property is “toString”.
  • +
  • If the stringifier is + unforgeable on the interface + or if the interface was declared with the [Global] or [PrimaryGlobal] extended attribute, + then the property exists on every object that implements the interface. + Otherwise, the property exists on the interface prototype object.
  • +
  • The property has attributes { [[Writable]]: B, [[Enumerable]]: true, [[Configurable]]: B }, + where B is false if the stringifier is + unforgeable on the interface, + and true otherwise.
  • +
  • +

    The value of the property is a Function object, which behaves as follows:

    +
      +
    1. Let O be the result of calling ToObject on the this value.
    2. +
    3. If O is a platform object, + then perform a security check on O + with the ECMAScript global environment associated with this Function that + implements the stringifier.
    4. +
    5. If O is not an object that implements the interface + on which the stringifier was declared, then throw a TypeError.
    6. +
    7. Let V be an uninitialized variable.
    8. +
    9. Depending on where stringifier was specified: +
      +
      on an attribute
      +
      Set V to the result of performing the actions listed in the description of the attribute that occur when getting + (or those listed in the description of the inherited attribute, if this attribute is declared to + inherit its getter), + with O as the object.
      +
      on an operation with an identifier
      +
      Set V to the result of performing the actions listed in the description + of the operation, using O as the this value + and passing no arguments.
      +
      on an operation with no identifier
      +
      Set V to the result of performing the stringification behavior + of the interface.
      +
      +
    10. +
    11. Return the result of converting V to a String value.
    12. +
    +

    The value of the Function object’s “length” + property is the Number value 0.

    +
  • +
+
+ +
+
4.5.8.2 Serializers
+ +

+ If the interface + has an exposed + serializer, then + a property MUST exist + whose name is “toJSON”, + with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } + and whose value is a + Function object. +

+

+ The location of the property is determined as follows: +

+ +

+ The property’s Function object, when invoked, + MUST behave as follows: +

+
    +
  1. Let O be the result of calling ToObject on the this value.
  2. +
  3. If O is a platform object, + then perform a security check on O + with the ECMAScript global environment associated with this Function that + implements the serializer.
  4. +
  5. If O is not an object that implements the interface + on which the serializer was declared, then throw a TypeError.
  6. +
  7. Depending on how serializer was specified: +
    +
    on an operation with an identifier
    +
    +
      +
    1. Return the result of performing the actions listed in the description of the + operation, using O as the this value + and passing no arguments.
    2. +
    +
    +
    as a keyword, either with or without a serialization pattern
    +
    +
      +
    1. Let S be the serialized value that is the result of invoking the serialization behavior of the + interface for object O.
    2. +
    3. Return the result of converting + S to an ECMAScript value.
    4. +
    +
    +
    +
  8. +
+

+ The following steps define how to convert a serialized value to an ECMAScript value: +

+
    +
  1. Let S be the serialized value.
  2. +
  3. Depending on the type of S: +
    +
    a map
    +
    +
      +
    1. Let O be a new object created as if by the expression ({}).
    2. +
    3. For each entry in S, in the order they were added to the map: +
        +
      1. Let V be the result of converting + the value of the entry to an ECMAScript value.
      2. +
      3. Let P be the entry’s key.
      4. +
      5. Call the [[DefineOwnProperty]] internal method of O passing + property name P, Property Descriptor { [[Value]]: V, + [[Writable]]: true, [[Enumerable]]: true, + [[Configurable]]: true }, and false + as arguments.
      6. +
      +
    4. +
    5. Return O.
    6. +
    +
    +
    a list
    +
    +
      +
    1. Let A be a new Array object created as if by the expression [].
    2. +
    3. Let index be 0.
    4. +
    5. While index is less than the number of elements in S: +
        +
      1. Let V be the result of converting + the value of the element in S at index index to an ECMAScript value.
      2. +
      3. Let P be ToString(index).
      4. +
      5. Call the [[DefineOwnProperty]] internal method of O passing + property name P, Property Descriptor { [[Value]]: V, + [[Writable]]: true, [[Enumerable]]: true, + [[Configurable]]: true }, and false + as arguments.
      6. +
      +
    6. +
    7. Return A.
    8. +
    +
    +
    any other serialized value
    +
    +
      +
    1. Let V be the result of converting + S to an ECMAScript value.
    2. +
    3. Return V.
    4. +
    +
    +
    +
  4. +
+
+
+ +
+

4.5.9 Common iterator behavior

+ +
+
4.5.9.1 @@iterator
+ +

+ If the interface + has any of the following: +

+ +

+ then a property MUST exist + whose name is the @@iterator symbol, + with attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true } + and whose value is a function object. +

+

+ The location of the property is determined as follows: +

+ +

+ If the interface has an iterable declaration, + then the Function, when invoked, + MUST behave as follows: +

+
    +
  1. Let object be the result of calling ToObject on the this value.
  2. +
  3. If object is a platform object, + then perform a security check on object + with the ECMAScript global environment associated with this Function.
  4. +
  5. Let interface be the interface + the iterable declaration is on.
  6. +
  7. If object is not a platform object + that implements interface, + then throw a TypeError.
  8. +
  9. Let iterator be a newly created default iterator object + for interface with object as its target and iterator kind “value”.
  10. +
  11. Return iterator.
  12. +
+

+ If the interface does not have an iterable declaration + but does define an indexed property getter, + then the Function object is %ArrayProto_values% ( + [ECMA-262] + , section 6.1.7.4). +

+ + +

+ The value of the @@iterator Function object’s “length” + property is the Number value 0. +

+
+ +
+
4.5.9.2 forEach
+ +

+ If the interface + has any of the following: +

+ +

+ then a property named “forEach” MUST exist + with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } + and whose value is a function object. +

+

+ The location of the property is determined as follows: +

+ +

+ If the interface has an iterable declaration, + then the Function MUST + have the same behavior as one that would exist assuming the interface had + this operation instead of the + iterable declaration: +

+
IDL
void forEach(Function callback, optional any thisArg = undefined);
+

+ with the following prose definition: +

+
    +
  1. Let values be the list of values to iterate over.
  2. +
  3. Let len be the length of values.
  4. +
  5. Initialize k to 0.
  6. +
  7. While k < len: +
      +
    1. Let kValue be the value in values at index k.
    2. +
    3. Invoke callback with thisArg + as the callback this value and + k and value as its arguments.
    4. +
    +
  8. +
+ + +

+ The value of the Function object’s “length” + property is the Number value 1. +

+
+
+ +
+

4.5.10 Iterable declarations

+ +
+
4.5.10.1 entries
+ +

+ If the interface has an + iterable declaration, + then a property named “entries” MUST exist + with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } + and whose value is a function object. +

+

+ The location of the property is determined as follows: +

+ +

+ The Function, when invoked, MUST behave as follows: +

+
    +
  1. Let object be the result of calling ToObject on the this value.
  2. +
  3. If object is a platform object, + then perform a security check on object + with the ECMAScript global environment associated with this Function.
  4. +
  5. Let interface be the interface + on which the iterable declaration is declared on.
  6. +
  7. If object is not a platform object + that implements interface, + then throw a TypeError.
  8. +
  9. Let iterator be a newly created default iterator object + for interface with object as its target and iterator kind “key+value”.
  10. +
  11. Return iterator.
  12. +
+

The value of the Function object’s “length” property is the Number value 0.

+
+ +
+
4.5.10.2 keys
+ +

+ If the interface has an + iterable declaration, + then a property named “keys” MUST exist + with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } + and whose value is a function object. +

+

+ The location of the property is determined as follows: +

+ +

+ The Function, when invoked, MUST behave as follows: +

+
    +
  1. Let object be the result of calling ToObject on the this value.
  2. +
  3. If object is a platform object, + then perform a security check on object + with the ECMAScript global environment associated with this Function.
  4. +
  5. Let interface be the interface + on which the iterable declaration is declared on.
  6. +
  7. If object is not a platform object + that implements interface, + then throw a TypeError.
  8. +
  9. Let iterator be a newly created default iterator object + for interface with object as its target and iterator kind “key”.
  10. +
  11. Return iterator.
  12. +
+

The value of the Function object’s “length” property is the Number value 0.

+
+ +
+
4.5.10.3 values
+ +

+ If the interface has an + iterable declaration, + then a property named “values” MUST exist + with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } + and whose value is the function object + that is the value of the @@iterator property. +

+

+ The location of the property is determined as follows: +

+ +

The value of the Function object’s “length” property is the Number value 0.

+
+ +
+
4.5.10.4 Default iterator objects
+ +

+ A default iterator object for a given + interface, target and iteration kind + is an object whose internal [[Prototype]] property is the + iterator prototype object + for the interface. +

+

+ A default iterator object + has three internal values: +

+
    +
  1. its target, which is an object whose values are to be iterated,
  2. +
  3. its kind, which is the iteration kind,
  4. +
  5. its index, which is the current index into the values value to be iterated.
  6. +
+

+ When a default iterator object is first created, + its index is set to 0. +

+

+ The class string of a + default iterator object + for a given interface + is the result of concatenting the identifier + of the interface and + the string “ Iterator”. +

+
+ +
+
4.5.10.5 Iterator prototype object
+ +

+ The iterator prototype object + for a given interface + is an object that exists for every interface that has an + iterable declaration. It serves as the + prototype for default iterator objects + for the interface. +

+

+ The internal [[Prototype]] property of an iterator prototype object + MUST be %IteratorPrototype% ( + [ECMA-262] + , section 6.1.7.4). +

+

+ An iterator prototype object + MUST have a property named “next” with + attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } + and whose value is a function object + that behaves as follows: +

+
    +
  1. Let interface be the interface for which the + iterator prototype object exists.
  2. +
  3. Let object be the result of calling ToObject on the this value.
  4. +
  5. If object is a platform object, + then perform a security check on object + with the ECMAScript global environment associated with this Function that + implements the iterator’s next method.
  6. +
  7. If object is not a default iterator object for interface, + then throw a TypeError.
  8. +
  9. Let target be object’s target.
  10. +
  11. Let index be object’s index.
  12. +
  13. Let kind be object’s kind.
  14. +
  15. Let values be the list of values to iterate over. +
    Note
    +

    Depending on whether prose accompanying the interface defined this to be a snapshot at the time + iteration begins, the list of values might be different from the previous time the next + method was called on this iterator object.

    +
    +
  16. +
  17. Let len be the length of values.
  18. +
  19. If object’s index is greater than or equal to len, then + return CreateIterResultObject(undefined, true).
  20. +
  21. Let result be a value determined by the value of kind: +
    +
    key
    +
    +
      +
    1. Let key be the ECMAScript Number value index.
    2. +
    3. result is key.
    4. +
    +
    +
    value
    +
    +
      +
    1. Let idlValue be the value in values at index index.
    2. +
    3. Let value be the result of converting idlValue to an ECMAScript value.
    4. +
    5. result is value.
    6. +
    +
    +
    key+value
    +
    +
      +
    1. Let key be the ECMAScript Number value index.
    2. +
    3. Let idlValue be the value in values at index index.
    4. +
    5. Let value be the result of converting idlValue to an ECMAScript value.
    6. +
    7. Let array be the result of performing ArrayCreate(2).
    8. +
    9. Call CreateDataProperty(array, "0", key).
    10. +
    11. Call CreateDataProperty(array, "1", value).
    12. +
    13. result is array.
    14. +
    +
    +
    +
  22. +
  23. Return CreateIterResultObject(result, false).
  24. +
+

+ The class string of an + iterator prototype object + for a given interface + is the result of concatenting the identifier + of the interface and + the string “Iterator”. +

+
+
+ + + + + +
+

4.5.11 Initializing objects from iterables

+ +

+ Some objects, which are attempting to emulate map- and set-like interfaces, will want to accept iterables + as constructor parameters and initialize themselves in this way. Here we provide some algorithms that can + be invoked in order to do so in the same way as in the ECMAScript spec, so that those objects behave + the same as the built-in Map and Set objects. +

+ +

+ To add map elements from an iterable iterable to + an object destination with adder method name adder, perform the following steps: +

+
    +
  1. If Type(destination) is not Object, then, throw a TypeError exception.
  2. +
  3. If iterable is not present, let iterable be undefined.
  4. +
  5. If iterable is either undefined or null, then let iter be undefined.
  6. +
  7. Else, +
      +
    1. Let adder be the result of Get(destination, adder).
    2. +
    3. ReturnIfAbrupt(adder).
    4. +
    5. If IsCallable(adder) is false, throw a TypeError exception.
    6. +
    7. Let iter be the result of GetIterator(iterable).
    8. +
    9. ReturnIfAbrupt(iter).
    10. +
    +
  8. +
  9. If iter is undefined, then return.
  10. +
  11. Repeat +
      +
    1. Let next be the result of IteratorStep(iter).
    2. +
    3. ReturnIfAbrupt(next).
    4. +
    5. If next is false, then return NormalCompletion(destination).
    6. +
    7. Let nextItem be IteratorValue(next).
    8. +
    9. ReturnIfAbrupt(nextItem).
    10. +
    11. If Type(nextItem) is not Object, then throw a TypeError exception.
    12. +
    13. Let k be the result of Get(nextItem, '0').
    14. +
    15. ReturnIfAbrupt(k).
    16. +
    17. Let v be the result of Get(nextItem, '1').
    18. +
    19. ReturnIfAbrupt(v).
    20. +
    21. Let status be the result of calling the [[Call]] internal method of adder with destination as + thisArgument and (k, v) as argumentsList.
    22. +
    23. ReturnIfAbrupt(status).
    24. +
    +
  12. +
+
+
+ +
+

4.6 Implements statements

+ +

+ The interface prototype object + of an interface A MUST have a copy of + each property that corresponds to one of the + constants, + attributes, + operations and + iterable declarations + that exist on all of the interface prototype objects of A’s + consequential interfaces. + For operations, where the property is a data property with a Function + object value, each copy of the property MUST have + distinct Function objects. For attributes, each + copy of the accessor property MUST have + distinct Function objects for their getters, + and similarly with their setters. +

+
Note
+

+ When invoking an operation by calling + a Function object that is the value of one of the copies that exists + due to an implements statement, the this value is + checked to ensure that it is an object that implements the + interface corresponding to the + interface prototype object + that the property is on. +

+

+ For example, consider the following IDL: +

+
IDL
interface A {
+  void f();
+};
+
+interface B { };
+B implements A;
+
+interface C { };
+C implements A;
+

+ Attempting to call B.prototype.f on an object that implements + A (but not B) or one + that implements C will result in a + TypeError being thrown. However, + calling A.prototype.f on an object that implements + B or one that implements C + would succeed. This is handled by the algorithm in section 4.5.8 + that defines how IDL operation invocation works in ECMAScript. +

+

+ Similar behavior is required for the getter and setter Function + objects that correspond to an IDL attributes, + and this is handled in section 4.5.7 . +

+
+
+ +
+

4.7 Platform objects implementing interfaces

+ +

+ Every platform object is associated with a global environment, just + as the initial objects are. + It is the responsibility of specifications using Web IDL to state + which global environment (or, by proxy, which global object) each platform + object is associated with. +

+

+ The primary interface of a platform object + that implements one or more interfaces is the most-derived non-supplemental interface + that it implements. The value of the internal [[Prototype]] + property of the platform object is the interface prototype object + of the primary interface + from the platform object’s associated global environment. +

+

+ The global environment that a given platform object + is associated with can change after it has been created. When + the global environment associated with a platform object is changed, its internal + [[Prototype]] property MUST be immediately + updated to be the interface prototype object + of the primary interface + from the platform object’s newly associated global environment. +

+ + + +

+ Every platform object that implements an [Unforgeable]-annotated + interface and which does not have a stringifier + that is unforgeable on any of the + interfaces it implements MUST have a property with the + following characteristics: +

+
    +
  • The name of the property is “toString”.
  • +
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • +
  • The value of the property is %ObjProto_toString% ( + [ECMA-262] + , section 6.1.7.4), the initial value of Object.prototype.toString.
  • +
+ +

+ Every platform object that implements an [Unforgeable]-annotated + interface and which does not have a serializer + that is unforgeable on any of the + interfaces it implements MUST have a property with the + following characteristics: +

+
    +
  • The name of the property is “toJSON”.
  • +
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • +
  • The value of the property is undefined.
  • +
+ +

+ Every platform object that implements an [Unforgeable]-annotated + interface MUST have a property with the + following characteristics: +

+
    +
  • The name of the property is “valueOf”.
  • +
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • +
  • + The value of the property is a Function object whose behavior + is as follows: +
      +
    1. Return the this value.
    2. +
    + This Function object is the + default unforgeable valueOf function. + The value of the Function object’s “length” + property is the Number value 0. +
  • +
+ +

+ The class string of + a platform object that implements one or more interfaces + MUST be the identifier of + the primary interface + of the platform object. +

+ +
+

4.7.1 Indexed and named properties

+ +

+ If a platform object implements an interface that + supports indexed or + named properties, + the object will appear to have additional properties that correspond to the + object’s indexed and named properties. These properties are not “real” own + properties on the object, but are made to look like they are by being exposed + by the [[GetOwnProperty]] internal method. +

+

+ However, when the [Global] or + [PrimaryGlobal] + extended attribute has been used, + named properties are not exposed on the object but on another object + in the prototype chain, the named properties object. +

+

+ It is permissible for an object to implement multiple interfaces that support indexed properties. + However, if so, and there are conflicting definitions as to the object’s + supported property indices, + or if one of the interfaces is a supplemental interface for the + platform object, then it is undefined what additional properties the object will appear to + have, or what its exact behavior will be with regard to its indexed properties. + The same applies for named properties. +

+

+ The indexed property getter + that is defined on the derived-most interface that the + platform object implements is the one that defines the behavior + when indexing the object with an array index. Similarly for + indexed property setters. + This way, the definitions of these special operations from + ancestor interfaces can be overridden. +

+ +

+ Platform objects implementing an interface that supports indexed or named properties cannot be fixed; if Object.freeze, Object.seal + or Object.preventExtensions is called on one of these objects, the function + MUST throw a TypeError. + Similarly, an interface prototype object + that exposes named properties due to the use of [Global] or + [PrimaryGlobal] + also MUST throw a TypeError + if one of the three functions above is called on it. +

+ +

+ The name of each property that appears to exist due to an object supporting indexed properties + is an array index property name, which is a property + name P such that Type(P) is String + and for which the following algorithm returns true: +

+
    +
  1. Let i be ToUint32(P).
  2. +
  3. Let s be ToString(i).
  4. +
  5. If sP or i = 232 − 1, then return false.
  6. +
  7. Return true.
  8. +
+ + +

+ A property name is an unforgeable property name on a + given platform object if the object implements an interface that + has an interface member with that identifier + and that interface member is unforgeable on any of + the interfaces that O implements. If the object implements an + [Unforgeable]-annotated + interface, then “toString” and “valueOf” are + also unforgeable property names + on that object. +

+

+ The named property visibility algorithm is used to determine if + a given named property is exposed on an object. Some named properties are not exposed on an object + depending on whether the [OverrideBuiltins] + extended attribute was used. The algorithm + operates as follows, with property name P and object O: +

+ +
    +
  1. If P is an unforgeable property name + on O, then return false.
  2. +
  3. If O implements an interface with + an [Unforgeable]-annotated attribute + whose identifier is P, then return false.
  4. +
  5. If P is not a supported property name + of O, then return false.
  6. +
  7. If O implements an interface that has the [OverrideBuiltins] + extended attribute, then return true.
  8. +
  9. If O has an own property named P, then return false.
  10. +
  11. Initialize prototype to be the value of the internal [[Prototype]] property of O.
  12. +
  13. While prototype is not null: +
      +
    1. If prototype is not a named properties object, + and prototype has an own property named P, then return false.
    2. +
    3. Set prototype to be the value of the internal [[Prototype]] property of prototype.
    4. +
    +
  14. +
  15. Return true.
  16. +
+
Note
+

This should ensure that for objects with named properties, property resolution is done in the following order:

+
    +
  1. Indexed properties.
  2. +
  3. Unforgeable attributes and operations.
  4. +
  5. Then, if [OverrideBuiltins]: +
      +
    1. Named properties.
    2. +
    3. Own properties.
    4. +
    5. Properties from the prototype chain.
    6. +
    +
  6. +
  7. Otherwise, if not [OverrideBuiltins]: +
      +
    1. Own properties.
    2. + +
    3. Properties from the prototype chain.
    4. + +
    5. Named properties.
    6. +
    +
  8. +
+
+

+ Support for getters is + handled by the platform object [[GetOwnProperty]] method + defined in section , and + for setters + by the platform object [[DefineOwnProperty]] method + defined in section and the platform object [[Set]] method + defined in section . +

+
+ +
+

4.7.2 The PlatformObjectGetOwnProperty abstract operation

+ +

+ The PlatformObjectGetOwnProperty + abstract operation performs the following steps when called with an + object O, a property name P, and a boolean + ignoreNamedProps value: +

+ +
    +
  1. + If O supports indexed properties + and P is an array index property name, then: +
      +
    1. Let index be the result of calling ToUint32(P).
    2. +
    3. If index is a supported property index, then: +
        +
      1. Let operation be the operation used to declare the indexed property getter.
      2. + +
      3. Let value be an uninitialized variable.
      4. +
      5. If operation was defined without an identifier, then + set value to the result of performing the steps listed in the interface description to + determine the value of an indexed property + with index as the index.
      6. +
      7. Otherwise, operation was defined with an identifier. Set value to the result + of performing the steps listed in the description of operation with index as the only argument value.
      8. + +
      9. Let desc be a newly created Property Descriptor ( + [ECMA-262] + , section 6.2.4) with no fields.
      10. +
      11. Set desc.[[Value]] to the result of converting + value to an ECMAScript value.
      12. +
      13. If O implements an interface with an indexed property setter, then set + desc.[[Writable]] to true, otherwise set it to + false.
      14. +
      15. Set desc.[[Enumerable]] and desc.[[Configurable]] to true.
      16. +
      17. Return desc.
      18. +
      +
    4. +
    5. Set ignoreNamedProps to true.
    6. +
    +
  2. + +
  3. If O supports named properties, O does not + implement an interface with the [Global] or [PrimaryGlobal] + extended attribute, the result of running the named property visibility algorithm with + property name P and object O is true, and ignoreNamedProps is false, then: +
      +
    1. Let operation be the operation used to declare the named property getter.
    2. + +
    3. Let value be an uninitialized variable.
    4. +
    5. If operation was defined without an identifier, then + set value to the result of performing the steps listed in the interface description to + determine the value of a named property + with P as the name.
    6. +
    7. Otherwise, operation was defined with an identifier. Set value to the result + of performing the steps listed in the description of operation with P as the only argument value.
    8. + +
    9. Let desc be a newly created Property Descriptor ( + [ECMA-262] + , section 6.2.4) with no fields.
    10. +
    11. Set desc.[[Value]] to the result of converting + value to an ECMAScript value.
    12. +
    13. If O implements an interface with a named property setter, then set + desc.[[Writable]] to true, otherwise set it to + false.
    14. +
    15. If the named property is defined to be unenumerable, + then set desc.[[Enumerable]] to false, + otherwise set it to true.
    16. +
    17. Set desc.[[Configurable]] to true.
    18. +
    19. Return desc.
    20. +
    +
  4. + +
  5. Return the result of calling the default [[GetOwnProperty]] internal method ( + [ECMA-262] + , section 9.1.5) on O passing P as the argument.
  6. +
+
+ +
+

4.7.3 Platform object [[GetOwnProperty]] method

+ +

+ The internal [[GetOwnProperty]] method of every + platform object O that implements an interface + which supports indexed or + named properties + MUST behave as follows when called with property name P: +

+ +
    +
  1. + Return the result of invoking the PlatformObjectGetOwnProperty + abstract operation with + O, P, and false as + arguments. +
  2. +
+
+ +
+

4.7.4 Invoking a platform object indexed property setter

+

+ To invoke an indexed property + setter with property name P and ECMAScript value + V, the following steps MUST be performed: +

+
    +
  1. Let index be the result of calling ToUint32(P).
  2. +
  3. Let creating be true if index is not a supported property index, and false otherwise.
  4. +
  5. Let operation be the operation used to declare the indexed property setter.
  6. +
  7. Let T be the type of the second argument of operation.
  8. +
  9. Let value be the result of converting V to an IDL value of type T.
  10. +
  11. If operation was defined without an identifier, then: +
      +
    1. If creating is true, then perform the steps listed in the interface description to + set the value of a new indexed property + with index as the index and value as the value.
    2. +
    3. Otherwise, creating is false. Perform the steps listed in the interface description to + set the value of an existing indexed property + with index as the index and value as the value.
    4. +
    +
  12. +
  13. Otherwise, operation was defined with an identifier. Perform the steps listed in the description of + operation with index and value as the two argument values.
  14. +
+
+ +
+

4.7.5 Invoking a platform object named property setter

+

+ To invoke a named property + setter with property name P and ECMAScript value + V, the following steps MUST be performed: +

+
    +
  1. Let creating be true if P is not a supported property name, and false otherwise.
  2. +
  3. Let operation be the operation used to declare the named property setter.
  4. +
  5. Let T be the type of the second argument of operation.
  6. +
  7. Let value be the result of converting V to an IDL value of type T.
  8. +
  9. If operation was defined without an identifier, then: +
      +
    1. If creating is true, then perform the steps listed in the interface description to + set the value of a new named property + with P as the name and value as the value.
    2. +
    3. Otherwise, creating is false. Perform the steps listed in the interface description to + set the value of an existing named property + with P as the name and value as the value.
    4. +
    +
  10. +
  11. Otherwise, operation was defined with an identifier. Perform the steps listed in the description of + operation with index and value as the two argument values.
  12. +
+
+ +
+

4.7.6 Platform object [[Set]] method

+ +

+ The internal [[Set]] method of every + platform object O that implements an interface + which supports indexed or + named properties + MUST behave as follows when called + with property name P, value V, and + ECMAScript language value Receiver: +

+ +
    +
  1. If O and Receiver are the same object, + then: +
      +
    1. + If O supports indexed + properties, P is an array index property + name, and O implements an interface with an indexed + property setter, then: +
        +
      1. + Invoke the indexed + property setter with P and V. +
      2. +
      3. Return true.
      4. +
      +
    2. + +
    3. + If O supports named + properties, Type(P) is String, + P is not an array index property + name, and O implements an interface with a named + property setter, then: +
        +
      1. + Invoke the named + property setter with P and V. +
      2. +
      3. Return true.
      4. +
      +
    4. +
    +
  2. +
  3. + Let ownDesc be the result of invoking the PlatformObjectGetOwnProperty + abstract operation with + O, P, and true as + arguments. +
  4. +
  5. + Perform steps 3-11 of the default [[Set]] internal method ( + [ECMA-262] + , section 9.1.9). +
  6. +
+
+ +
+

4.7.7 Platform object [[DefineOwnProperty]] method

+ +

+ The internal [[DefineOwnProperty]] method of every + platform object O that implements an interface + which supports indexed or + named properties + MUST behave as follows when called with property name P, + Property Descriptor Desc and boolean flag Throw. + The term “Reject” is used as defined in + section . +

+ +
    +
  1. + If O supports indexed properties and + P is an array index property name, then: +
      +
    1. If the result of calling IsDataDescriptor(Desc) is false, then Reject.
    2. +
    3. If O does not implement an interface with an indexed property setter, then Reject.
    4. +
    5. Invoke the indexed + property setter with P and Desc.[[Value]].
    6. +
    7. Return true.
    8. +
    +
  2. + +
  3. + If O supports named properties, + O does not implement an interface with the + [Global] or [PrimaryGlobal] extended attribute + and P is not an unforgeable property name + of O, then: +
      +
    1. Let creating be true if P is not a supported property name, and false otherwise.
    2. + +
    3. If O implements an interface with the [OverrideBuiltins] + extended attribute or O does not have an own property + named P, then: +
        +
      1. If creating is false and O does not implement an interface with a named property setter, then Reject.
      2. +
      3. If O implements an interface with a named property setter, then: +
          +
        1. If the result of calling IsDataDescriptor(Desc) is false, then Reject.
        2. +
        3. + Invoke the named + property setter with P and + Desc.[[Value]]. +
        4. +
        5. Return true.
        6. +
        +
      4. +
      +
    4. +
    +
  4. + +
  5. If O does not implement an interface with the + [Global] or [PrimaryGlobal] extended attribute, + then set Desc.[[Configurable]] to true.
  6. +
  7. Call the default [[DefineOwnProperty]] internal method ( + [ECMA-262] + , section 9.1.6) on O passing P, Desc, and Throw as arguments.
  8. +
+
+ +
+

4.7.8 Platform object [[Delete]] method

+ +

+ The internal [[Delete]] method of every + platform object O that implements an interface + which supports indexed or + named properties + MUST behave as follows when called with property name P. +

+ +
    +
  1. + If O supports indexed properties and + P is an array index property name, then: +
      +
    1. Let index be the result of calling ToUint32(P).
    2. +
    3. If index is not a supported property index, then return true.
    4. +
    5. Return false.
    6. +
    +
  2. + +
  3. + If O supports named properties, + O does not implement an interface with the + [Global] or [PrimaryGlobal] extended attribute + and the result of calling the named property visibility algorithm + with property name P and object O is true, then: +
      +
    1. If O does not implement an interface with a named property deleter, then false.
    2. +
    3. Let operation be the operation used to declare the named property deleter.
    4. +
    5. If operation was defined without an identifier, then: +
        +
      1. Perform the steps listed in the interface description to + delete an existing named property + with P as the name.
      2. +
      3. If the steps indicated that the deletion failed, then false.
      4. +
      +
    6. +
    7. Otherwise, operation was defined with an identifier: +
        +
      1. Perform the steps listed in the description of operation with P as the only argument value.
      2. +
      3. If operation was declared with a return type of boolean + and the steps returned false, then false.
      4. +
      +
    8. +
    9. Return true.
    10. +
    +
  4. + +
  5. If O has an own property with name P, then: +
      +
    1. If the property is not configurable, then false.
    2. +
    3. Otherwise, remove the property from O.
    4. +
    +
  6. +
  7. Return true.
  8. +
+
+ +
+

4.7.9 Platform object [[Call]] method

+ +

+ The internal [[Call]] method of every + platform object O that implements an interface + I with at least one legacy caller + MUST behave as follows, assuming + arg0..n−1 is the list of argument + values passed to [[Call]]: +

+
    +
  1. Initialize S to the effective overload set + for legacy callers on I and with argument count n.
  2. +
  3. + Let <operation, values> be the result of passing S and + arg0..n−1 to the + overload resolution algorithm. +
  4. +
  5. Perform the actions listed in the description of the legacy caller operation with + values as the argument values.
  6. +
  7. Return the result of converting + the return value from those actions to an ECMAScript value of the type + operation is declared to return (or undefined + if operation is declared to return void).
  8. +
+
+ +
+

4.7.10 Property enumeration

+ +

+ This document does not define a complete property enumeration order + for all platform objects implementing interfaces + (or for platform objects representing exceptions). + However, if a platform object implements an interface that + supports indexed or + named properties, then + properties on the object MUST be + enumerated in the following order: +

+
    +
  1. If the object supports indexed properties, then + the object’s supported property indices are + enumerated first, in numerical order.
  2. +
  3. If the object supports named properties, then + the object’s supported property names that + are visible according to the named property visibility algorithm + and which are not stated to be unenumerable + are enumerated next, in the order given in the definition of the set of supported property names.
  4. +
  5. Finally, any enumerable own properties or properties from the object’s prototype chain are then enumerated, + in no defined order.
  6. +
+
Note
+

Future versions of the ECMAScript specification may define a total order for property enumeration.

+
+
+
+ +
+

4.8 User objects implementing callback interfaces

+ +

+ As described in section 3.9 , + callback interfaces can be + implemented in script by an ECMAScript object. + The following cases determine whether and how a given object + is considered to be a user object implementing a callback interface: +

+
    +
  • + If the interface is a single operation callback interface + object is considered to implement the interface. + The implementation of the operation (or set of overloaded operations) is + as follows: +
      +
    • If the object is callable, + then the implementation of the operation (or set of overloaded operations) is + the callable object itself.
    • +
    • Otherwise, the object is not callable. + The implementation of the operation (or set of overloaded operations) is + the result of invoking the internal [[Get]] method + on the object with a property name that is the identifier + of the operation.
    • +
    +
  • +
  • + Otherwise, the interface is not a single operation callback interface object is considered to implement the interface. + For each operation declared on the interface with a given identifier, the implementation + is the result of invoking [[Get]] on the object with a + property name that is that identifier. +
  • +
+

+ A single operation callback interface is + a callback interface that: +

+ +

+ A user object’s + operation is called + with a list of IDL argument values idlarg0..n−1 by + following the algorithm below. The callback this value + is the value to use as the this value + when a callable + object was supplied as the implementation of a + single operation callback interface. + By default, undefined is used as the callback this value, + however this MAY be overridden by other + specifications. +

+
    +
  1. Try running the following steps: +
      +
    1. Let V be the IDL callback interface type value + that represents the user object implementing the interface.
    2. +
    3. Let O be the ECMAScript object corresponding to V.
    4. +
    5. Let X be the implementation of the operation. If the interface is a single operation callback interface and IsCallable(O) is true, then X is O. + Otherwise, X is the result of calling + the internal [[Get]] method of O with the identifier of the operation as the property name.
    6. +
    7. If Type(X) is not Object, throw a TypeError exception.
    8. +
    9. If IsCallable(X) is false, then throw a TypeError exception.
    10. +
    11. Let this be O if the interface is not a single operation callback interface + or if IsCallable(O) is false, + and the callback this value otherwise.
    12. +
    13. + Let arg0..n−1 be a list of + ECMAScript values, where argi is the result + of converting + idlargi to an ECMAScript value. +
    14. +
    15. Let script be the callback context associated with V.
    16. +
    17. Push script on to the stack of incumbent scripts. [HTML]
    18. +
    19. Let R be an uninitialized variable.
    20. +
    21. Try running the following step: +
        +
      1. Set R to the result of invoking the [[Call]] method of X, providing this as the this value and arg0..n−1 as the argument values.
      2. +
      + And then, whether or not an exception was thrown: +
        +
      1. Pop script off the stack of incumbent scripts.
      2. +
      3. If an exception was thrown, end these steps, and allow it to propagate.
      4. +
      +
    22. +
    23. If the operation’s return type is void, return.
    24. +
    25. + Return the result of converting + R to an IDL value of the same type as the operation’s return type. +
    26. +
    + And then, if an exception was thrown: +
      +
    1. If the operation has a return type that is a promise type, then: +
        +
      1. Let reject be the initial value of %Promise%.reject.
      2. +
      3. Return the result of calling reject with %Promise% as the this object and the exception as the single argument value.
      4. +
      +
    2. +
    3. Otherwise, end these steps and allow the exception to propagate.
    4. +
    +
  2. +
+

+ Note that ECMAScript objects need not have + properties corresponding to constants + on them to be considered as user objects + implementing interfaces that happen + to have constants declared on them. +

+

+ The value of a user object’s + attribute is retrieved using the + following algorithm: +

+
    +
  1. Try running the following steps: +
      +
    1. Let V be the IDL callback interface type value + that represents the user object implementing the interface.
    2. +
    3. Let O be the ECMAScript object corresponding to V.
    4. +
    5. Let P be the identifier of the attribute.
    6. +
    7. Let script be the callback context associated with V.
    8. +
    9. Push script on to the stack of incumbent scripts. [HTML]
    10. +
    11. Let R be an uninitialized variable.
    12. +
    13. Try running the following step: +
        +
      1. Set R to the result of invoking the [[Get]] method of O with property name P.
      2. +
      + And then, whether or not an exception was thrown: +
        +
      1. Pop script off the stack of incumbent scripts.
      2. +
      3. If an exception was thrown, end these steps, and allow it to propagate.
      4. +
      +
    14. +
    15. Return the result of converting R to an IDL value of the same type as the attribute’s type.
    16. +
    + And then, if an exception was thrown: +
      +
    1. If the attribute has a return type that is a promise type, then: +
        +
      1. Let reject be the initial value of %Promise%.reject.
      2. +
      3. Return the result of calling reject with %Promise% as the this object and the exception as the single argument value.
      4. +
      +
    2. +
    3. Otherwise, end these steps and allow the exception to propagate.
    4. +
    +
  2. +
+

+ The value of a user object’s + attribute is set using the + following algorithm: +

+
    +
  1. Let V be the IDL callback interface type value + that represents the user object implementing the interface.
  2. +
  3. Let O be the ECMAScript object corresponding to V.
  4. +
  5. Let P be the identifier of the attribute.
  6. +
  7. Let V be the IDL value to be assigned to the attribute.
  8. +
  9. Let W be the result of converting V to an ECMAScript value.
  10. +
  11. Let script be the callback context associated with V.
  12. +
  13. Push script on to the stack of incumbent scripts. [HTML]
  14. +
  15. Try running the following step: +
      +
    1. Invoke the [[Put]] method of O with property name P and value W.
    2. +
    + And then, whether or not an exception was thrown: +
      +
    1. Pop script off the stack of incumbent scripts.
    2. +
    3. If an exception was thrown, end these steps, and allow it to propagate.
    4. +
    +
  16. +
+
+ +
+

4.9 Invoking callback functions

+ +

+ An ECMAScript callable object that is being + used as a callback function value is + called in a manner similar to how operations + on user objects are called (as + described in the previous section). The callable object + is called with a list of values + arg0..n−1, + each of which is either an IDL value of the special value “missing” (representing + a missing optional argument), by + following the algorithm below. By default, the callback this value + when invoking a callback function + is undefined, unless overridden by other specifications. +

+
    +
  1. Try running the following steps: +
      +
    1. Let V be the IDL callback function type value.
    2. +
    3. Let F be the ECMAScript object corresponding to V.
    4. +
    5. Let R be an uninitialized variable.
    6. +
    7. If IsCallable(F) is false, then set R to the value undefined.
    8. +
    9. Otherwise, +
        +
      1. Initialize values to be an empty list of ECMAScript values.
      2. +
      3. Initialize count to 0.
      4. +
      5. Initialize i to 0.
      6. +
      7. While i < n: +
          +
        1. If argi is the special value “missing”, then append to values the ECMAScript undefined value.
        2. +
        3. Otherwise, argi is an IDL value. Append to values the result of + converting argi to an ECMAScript value, + and set count to i + 1.
        4. +
        5. Set i to i + 1.
        6. +
        +
      8. +
      9. Truncate values to have length count.
      10. +
      11. Let script be the callback context associated with V.
      12. +
      13. Push script on to the stack of incumbent scripts. [HTML]
      14. +
      15. Try running the following step: +
          +
        1. Set R to the result of invoking the [[Call]] method of F, providing the callback this value as the this value and values as the argument values.
        2. +
        + And then, whether or not an exception was thrown: +
          +
        1. Pop script off the stack of incumbent scripts.
        2. +
        3. If an exception was thrown, end these steps, and allow it to propagate.
        4. +
        +
      16. +
      +
    10. +
    11. If the callback function’s return type is void, return.
    12. +
    13. + Return the result of converting + R to an IDL value of the same type as the callback function’s return type. +
    14. +
    + And then, if an exception was thrown: +
      +
    1. If the callback function has a return type that is a promise type, then: +
        +
      1. Let reject be the initial value of %Promise%.reject.
      2. +
      3. Return the result of calling reject with %Promise% as the this object and the exception as the single argument value.
      4. +
      +
    2. +
    3. Otherwise, end these steps and allow the exception to propagate.
    4. +
    +
  2. +
+
+ +
+

4.10 Exceptions

+ +

+ There MUST exist a property on the ECMAScript global object + whose name is “DOMException” and value is an object called the + DOMException constructor object, + which provides access to legacy DOMException code constants and allows construction of + DOMException instances. + The property has the attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. +

+ +
+

4.10.1 DOMException constructor object

+ +

+ The DOMException constructor object MUST be a function object + but with a [[Prototype]] value of %Error% ( + [ECMA-262] + , section 6.1.7.4). +

+

+ For every legacy code listed in the error names table, + there MUST be a property on the DOMException constructor object + whose name and value are as indicated in the table. The property has + attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }. +

+

+ The DOMException constructor object MUST also have a property named + “prototype” with attributes + { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: false } + whose value is an object called the DOMException prototype object. + This object also provides access to the legacy code values. +

+ +
+
4.10.1.1 DOMException(message, name)
+ +

When the DOMException function is called with arguments message and name, the following steps are taken:

+ +
    +
  1. Let F be the active function object.
  2. +
  3. If NewTarget is undefined, let newTarget be F, else let newTarget be NewTarget.
  4. +
  5. Let super be F.[[GetPrototypeOf]]().
  6. +
  7. ReturnIfAbrupt(super).
  8. +
  9. If IsConstructor(super) is false, throw a TypeError exception.
  10. +
  11. Let O be Construct(super, «message», newTarget).
  12. +
  13. If name is not undefined, then +
      +
    1. Let name be ToString(name).
    2. +
    3. Let status be DefinePropertyOrThrow(O, "name", PropertyDescriptor{[[Value]]: name, [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true}).
    4. +
    5. ReturnIfAbrupt(status).
    6. +
    7. Let code be the legacy code indicated in the error names table for error name name, or 0 if there is none.
    8. +
    9. Let status be DefinePropertyOrThrow(O, "code", PropertyDescriptor{[[Value]]: code, [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true}).
    10. +
    11. ReturnIfAbrupt(status).
    12. +
    +
  14. +
  15. Return O.
  16. +
+
+
+ +
+

4.10.2 DOMException prototype object

+ +

+ The DOMException prototype object MUST + have an internal [[Prototype]] property whose value is %ErrorPrototype% ( + [ECMA-262] + , section 6.1.7.4). +

+

+ The class string of the + DOMException prototype object + is “DOMExceptionPrototype”. +

+

+ There MUST be a property named “constructor” + on the DOMException prototype object with attributes + { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true } + and whose value is the DOMException constructor object. +

+

+ For every legacy code listed in the error names table, + there MUST be a property on the DOMException prototype object + whose name and value are as indicated in the table. The property has + attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }. +

+
+
+ +
+

4.11 Exception objects

+ +

+ Simple exceptions are represented + by native ECMAScript objects of the corresponding type. +

+

+ DOMExceptions are represented by + platform objects that inherit from + the DOMException prototype object. +

+

+ Every platform object representing a DOMException in ECMAScript is associated with a global environment, just + as the initial objects are. + When an exception object is created by calling the DOMException constructor object, + either normally or as part of a new expression, then the global environment + of the newly created object is associated with MUST be the same as for the + DOMException constructor object itself. +

+

+ The value of the internal [[Prototype]] + property of a DOMException + object MUST be the DOMException prototype object + from the global environment the exception object is associated with. +

+

+ The class string + of a DOMException object + MUST be “DOMException”. +

+
Note
+

The intention is for DOMException objects to be just like the other + various native Error objects that the + ECMAScript specification defines, apart from responding differently + to being passed to Object.prototype.toString and it having a “code” property. + If an implementation places non-standard properties on native + Error objects, exposing for example + stack traces or error line numbers, then these ought to be exposed + on exception objects too.

+
+
+ +
+

4.12 Creating and throwing exceptions

+ +

+ First, we define the current global environment + as the result of running the following algorithm: +

+
    +
  1. + Let F be the Function object used + as the this value in the top-most call + on the ECMAScript call stack where F corresponds to an IDL + attribute, + operation, + indexed property, + named property, + constructor, + named constructor, + stringifier, + exception constructor or + exception field getter. +
  2. +
  3. + If F corresponds to an attribute, operation or stringifier, then return + the global environment associated with the + interface that definition appears on. +
  4. +
  5. + Otherwise, if F corresponds to an indexed or named property, then return + the global environment associated with the interface that + the indexed or named property getter, setter or deleter was defined on. +
  6. +
  7. + Otherwise, if F is a named constructor for an interface, or is + an interface object for an + interface that is a constructor, then return the global environment + associated with that interface. +
  8. +
  9. + Otherwise, if F is an exception field getter, then return + the global environment associated with the exception on which the + exception field was defined. +
  10. +
  11. + Otherwise, F is an exception interface object that is a constructor. + Return the global environment associated with that exception. +
  12. +
+

+ When a simple exception or + DOMException + E is to be created, + with error name N and + optional user agent-defined message M, + the following steps MUST be followed: +

+
    +
  1. If M was not specified, let M be undefined. Otherwise, let it be the result of converting M to a String value.
  2. +
  3. Let N be the result of converting N to a String value.
  4. +
  5. Let args be a list of ECMAScript values. +
    +
    E is DOMException
    +
    args is (undefined, N).
    +
    E is a simple exception
    +
    args is (M)
    +
    +
  6. +
  7. Let G be the current global environment.
  8. +
  9. Let X be an object determined based on the type of E: +
    +
    E is DOMException
    +
    X is the DOMException constructor object + from the global environment G.
    +
    E is a simple exception
    +
    X is the constructor for the corresponding ECMAScript error + from the global environment G.
    +
    +
  10. +
  11. Let O be the result of calling X as a function + with args as the argument list.
  12. +
  13. Return O.
  14. +
+

+ When a simple exception or + DOMException + E is to be thrown, + with error name N and + optional user agent-defined message M, + the following steps MUST be followed: +

+
    +
  1. Let O be the result of creating + the specified exception E with error name N and + optional user agent-defined message M.
  2. +
  3. Throw O.
  4. +
+
Note
+

+ The above algorithms do not restrict platform objects representing exceptions + propagating out of a Function to be + ones that are associated with the global environment + where that Function object originated. + For example, consider the IDL: +

+
IDL
interface A {
+
+  /**
+   * Calls computeSquareRoot on m, passing x as its argument.
+   */
+  double doComputation(MathUtils m, double x);
+};
+
+interface MathUtils {
+  /**
+   * If x is negative, throws a NotSupportedError.  Otherwise, returns
+   * the square root of x.
+   */
+  double computeSquareRoot(double x);
+};
+

+ If we pass a MathUtils object from + a different global environment to doComputation, then the exception + thrown will be from that global environment: +

+
ECMAScript
var a = getA();                           // An A object from this global environment.
+var m = otherWindow.getMathUtils();       // A MathUtils object from a different global environment.
+
+a instanceof Object;                      // Evaluates to true.
+m instanceof Object;                      // Evaluates to false.
+m instanceof otherWindow.Object;          // Evaluates to true.
+
+try {
+  a.doComputation(m, -1);
+} catch (e) {
+  e instanceof DOMException;              // Evaluates to false.
+  e instanceof otherWindow.DOMException;  // Evaluates to true.
+}
+
+

+ Any requirements in this document to throw an instance of an ECMAScript built-in + Error MUST use + the built-in from the current global environment. +

+
+ +
+

4.13 Handling exceptions

+ +

+ None of the algorithms or processing requirements in the + ECMAScript language binding catch ECMAScript exceptions. Whenever + an ECMAScript Function is invoked due + to requirements in this section and that Function + ends due to an exception being thrown, that exception + MUST propagate to the caller, and if + not caught there, to its caller, and so on. +

+
Example
+

+ The following IDL fragment + defines two interfaces + and an exception. + The valueOf attribute on ExceptionThrower + is defined to throw an exception whenever an attempt is made + to get its value. +

+
IDL
interface Dahut {
+  attribute DOMString type;
+};
+
+interface ExceptionThrower {
+  // This attribute always throws a NotSupportedError and never returns a value.
+  attribute long valueOf;
+};
+

+ Assuming an ECMAScript implementation supporting this interface, + the following code demonstrates how exceptions are handled: +

+
ECMAScript
var d = getDahut();              // Obtain an instance of Dahut.
+var et = getExceptionThrower();  // Obtain an instance of ExceptionThrower.
+
+try {
+  d.type = { toString: function() { throw "abc"; } };
+} catch (e) {
+  // The string "abc" is caught here, since as part of the conversion
+  // from the native object to a string, the anonymous function
+  // was invoked, and none of the [[DefaultValue]], ToPrimitive or
+  // ToString algorithms are defined to catch the exception.
+}
+
+try {
+  d.type = { toString: { } };
+} catch (e) {
+  // An exception is caught here, since an attempt is made to invoke
+  // [[Call]] on the native object that is the value of toString
+  // property.
+}
+
+d.type = et;
+// An uncaught NotSupportedError DOMException is thrown here, since the
+// [[DefaultValue]] algorithm attempts to get the value of the
+// "valueOf" property on the ExceptionThrower object.  The exception
+// propagates out of this block of code.
+
+
+
+ +
+

5. Common definitions

+ +

+ This section specifies some common definitions that all + conforming implementations + MUST support. +

+ +
+

5.1 ArrayBufferView

+ +
IDL
typedef (Int8Array or Int16Array or Int32Array or
+         Uint8Array or Uint16Array or Uint32Array or Uint8ClampedArray or
+         Float32Array or Float64Array or DataView) ArrayBufferView;
+

+ The ArrayBufferView typedef is used to represent + objects that provide a view on to an ArrayBuffer. +

+
+ +
+

5.2 BufferSource

+ +
IDL
typedef (ArrayBufferView or ArrayBuffer) BufferSource;
+

+ The BufferSource typedef is used to represent objects + that are either themselves an ArrayBuffer or which + provide a view on to an ArrayBuffer. +

+
+ +
+

5.3 DOMTimeStamp

+ +
IDL
typedef unsigned long long DOMTimeStamp;
+

+ The DOMTimeStamp type is used for representing + a number of milliseconds, either as an absolute time (relative to some epoch) + or as a relative amount of time. Specifications that use this type will need + to define how the number of milliseconds is to be interpreted. +

+
+ +
+

5.4 Function

+ +
IDL
callback Function = any (any... arguments);
+

+ The Function callback function + type is used for representing function values with no restriction on what arguments + are passed to it or what kind of value is returned from it. +

+
+ +
+

5.5 VoidFunction

+ +
IDL
callback VoidFunction = void ();
+

+ The VoidFunction callback function + type is used for representing function values that take no arguments and do not + return any value. +

+
+
+ +
+

6. Extensibility

+ +

This section is informative.

+ +

+ Extensions to language binding requirements can be specified + using extended attributes + that do not conflict with those defined in this document. Extensions for + private, project-specific use should not be included in + IDL fragments + appearing in other specifications. It is recommended that extensions + that are required for use in other specifications be coordinated + with the group responsible for work on Web IDL, which + at the time of writing is the + W3C Web Platform Working Group, + for possible inclusion in a future version of this document. +

+

+ Extensions to any other aspect of the IDL language are + strongly discouraged. +

+
+ +
+

7. Referencing this specification

+ +

This section is informative.

+ +

+ It is expected that other specifications that define Web platform interfaces + using one or more IDL fragments + will reference this specification. It is suggested + that those specifications include a sentence such as the following, + to indicate that the IDL is to be interpreted as described in this + specification: +

+
+

+ The IDL fragment in Appendix A of this specification must, in conjunction + with the IDL fragments defined in this specification's normative references, + be interpreted as required for conforming sets of IDL fragments, as described in the + “Web IDL” specification. [WEBIDL] +

+
+

+ In addition, it is suggested that the conformance class for user + agents in referencing specifications be linked to the + conforming + implementation class from this specification: +

+
+

+ A conforming FooML user agent must also be a + conforming implementation of the IDL fragment in Appendix A + of this specification, as described in the + “Web IDL” specification. [WEBIDL] +

+
+
+ +
+

8. Acknowledgements

+ +

This section is informative.

+ +

+ The editor would like to thank the following people for contributing + to this specification: + Glenn Adams, + David Andersson, + L. David Baron, + Art Barstow, + Nils Barth, + Robin Berjon, + David Bruant, + Jan-Ivar Bruaroey, + Marcos Cáceres, + Giovanni Campagna, + Domenic Denicola, + Michael Dyck, + Brendan Eich, + João Eiras, + Gorm Haug Eriksen, + Sigbjorn Finne, + David Flanagan, + Aryeh Gregor, + Dimitry Golubovsky, + James Graham, + Aryeh Gregor, + Kartikaya Gupta, + Marcin Hanclik, + Jed Hartman, + Stefan Haustein, + Dominique Hazaël-Massieux, + Ian Hickson, + Björn Höhrmann, + Kyle Huey, + Lachlan Hunt, + Oliver Hunt, + Jim Jewett, + Wolfgang Keller, + Anne van Kesteren, + Olav Junker Kjær, + Magnus Kristiansen, + Travis Leithead, + Jim Ley, + Kevin Lindsey, + Jens Lindström, + Peter Linss, + 呂康豪 (Kang-Hao Lu), + Kyle Machulis, + Mark Miller, + Ms2ger, + Andrew Oakley, + 岡坂 史紀 (Shiki Okasaka), + Jason Orendorff, + Olli Pettay, + Simon Pieters, + Andrei Popescu, + François Remy, + Tim Renouf, + Alex Russell, + Takashi Sakamoto, + Doug Schepers, + Jonas Sicking, + Garrett Smith, + Geoffrey Sneddon, + Jungkee Song, + Josh Soref, + Maciej Stachowiak, + Anton Tayanovskyy, + Peter Van der Beken, + Jeff Walden, + Allen Wirfs-Brock, + Jeffrey Yasskin and + Collin Xu. +

+

+ Special thanks also go to Sam Weinig for maintaining this document + while the editor was unavailable to do so. +

+
+ + +

A. IDL grammar

+ This section defines an LL(1) grammar whose start symbol, + Definitions, matches an + entire IDL fragment. +

+ Each production in the grammar has on its right hand side either a + non-zero sequence of terminal and non-terminal symbols, or an + epsilon (ε) which indicates no symbols. Symbols that begin with + an uppercase letter are non-terminal symbols. Symbols within quotes + are terminal symbols that are matched with the exact text between + the quotes. Symbols that begin with a lowercase letter are terminal + symbols that are matched by the regular expressions (using Perl 5 regular + expression syntax [PERLRE]) as follows: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
integer=/-?([1-9][0-9]*|0[Xx][0-9A-Fa-f]+|0[0-7]*)/
float=/-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)/
identifier=/_?[A-Za-z][0-9A-Z_a-z-]*/
string=/"[^"]*"/
whitespace=/[\t\n\r ]+/
comment=/\/\/.*|\/\*(.|\n)*?\*\//
other=/[^\t\n\r 0-9A-Za-z]/

+ The tokenizer operates on a sequence of Unicode characters + [UNICODE]. + When tokenizing, the longest possible match MUST be used. For example, if the input + text is “a1”, it is tokenized as a single identifier, + and not as a separate identifier and integer. + If the longest possible match could match one of the above named terminal symbols or + one of the quoted terminal symbols from the grammar, it MUST be tokenized as the quoted + terminal symbol. Thus, the input text “long” is tokenized as the quoted terminal symbol + "long" rather than an identifier called “long”, + and “.” is tokenized as the quoted terminal symbol + "." rather than an other. +

+ The IDL syntax is case sensitive, both for the quoted terminal symbols + used in the grammar and the values used for + identifier terminals. Thus, for + example, the input text “Const” is tokenized as + an identifier rather than the quoted + terminal symbol "const", an + interface with + identifier + “A” is distinct from one named “a”, and an + extended attribute + [constructor] will not be recognized as + the [Constructor] + extended attribute. +

+ Implicitly, any number of whitespace and + comment terminals are allowed between every other terminal + in the input text being parsed. Such whitespace and + comment terminals are ignored while parsing. +

+ The following LL(1) grammar, starting with Definitions, + matches an IDL fragment: +

[1]DefinitionsExtendedAttributeList Definition Definitions
 | + ε
[2]DefinitionCallbackOrInterface
 | + Partial
 | + Dictionary
 | + Enum
 | + Typedef
 | + ImplementsStatement
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | + Interface
[4]CallbackRestOrInterfaceCallbackRest
 | + Interface
[5]Interface"interface" identifier Inheritance "{" InterfaceMembers "}" ";"
[6]Partial"partial" PartialDefinition
[7]PartialDefinitionPartialInterface
 | + PartialDictionary
[8]PartialInterface"interface" identifier "{" InterfaceMembers "}" ";"
[9]InterfaceMembersExtendedAttributeList InterfaceMember InterfaceMembers
 | + ε
[10]InterfaceMemberConst
 | + Operation
 | + Serializer
 | + Stringifier
 | + StaticMember
 | + Iterable
 | + ReadOnlyMember
 | + ReadWriteAttribute
[11]Dictionary"dictionary" identifier Inheritance "{" DictionaryMembers "}" ";"
[12]DictionaryMembersExtendedAttributeList DictionaryMember DictionaryMembers
 | + ε
[13]DictionaryMemberRequired Type identifier Default ";"
[14]Required"required"
 | + ε
[15]PartialDictionary"dictionary" identifier "{" DictionaryMembers "}" ";"
[16]Default"=" DefaultValue
 | + ε
[17]DefaultValueConstValue
 | + string
 | + "[" "]"
[18]Inheritance":" identifier
 | + ε
[19]Enum"enum" identifier "{" EnumValueList "}" ";"
[20]EnumValueListstring EnumValueListComma
[21]EnumValueListComma"," EnumValueListString
 | + ε
[22]EnumValueListStringstring EnumValueListComma
 | + ε
[23]CallbackRestidentifier "=" ReturnType "(" ArgumentList ")" ";"
[24]Typedef"typedef" Type identifier ";"
[25]ImplementsStatementidentifier "implements" identifier ";"
[26]Const"const" ConstType identifier "=" ConstValue ";"
[27]ConstValueBooleanLiteral
 | + FloatLiteral
 | + integer
 | + "null"
[28]BooleanLiteral"true"
 | + "false"
[29]FloatLiteralfloat
 | + "-Infinity"
 | + "Infinity"
 | + "NaN"
[30]Serializer"serializer" SerializerRest
[31]SerializerRestOperationRest
 | + "=" SerializationPattern ";"
 | + ";"
[32]SerializationPattern"{" SerializationPatternMap "}"
 | + "[" SerializationPatternList "]"
 | + identifier
[33]SerializationPatternMap"getter"
 | + "inherit" Identifiers
 | + identifier Identifiers
 | + ε
[34]SerializationPatternList"getter"
 | + identifier Identifiers
 | + ε
[35]Stringifier"stringifier" StringifierRest
[36]StringifierRestReadOnly AttributeRest
 | + ReturnType OperationRest
 | + ";"
[37]StaticMember"static" StaticMemberRest
[38]StaticMemberRestReadOnly AttributeRest
 | + ReturnType OperationRest
[39]ReadOnlyMember"readonly" ReadOnlyMemberRest
[40]ReadOnlyMemberRestAttributeRest
[41]ReadWriteAttribute"inherit" ReadOnly AttributeRest
 | + AttributeRest
[42]AttributeRest"attribute" Type AttributeName ";"
[43]AttributeNameAttributeNameKeyword
 | + identifier
[44]AttributeNameKeyword"required"
[45]Inherit"inherit"
 | + ε
[46]ReadOnly"readonly"
 | + ε
[47]OperationReturnType OperationRest
 | + SpecialOperation
[48]SpecialOperationSpecial Specials ReturnType OperationRest
[49]SpecialsSpecial Specials
 | + ε
[50]Special"getter"
 | + "setter"
 | + "deleter"
 | + "legacycaller"
[51]OperationRestOptionalIdentifier "(" ArgumentList ")" ";"
[52]OptionalIdentifieridentifier
 | + ε
[53]ArgumentListArgument Arguments
 | + ε
[54]Arguments"," Argument Arguments
 | + ε
[55]ArgumentExtendedAttributeList OptionalOrRequiredArgument
[56]OptionalOrRequiredArgument"optional" Type ArgumentName Default
 | + Type Ellipsis ArgumentName
[57]ArgumentNameArgumentNameKeyword
 | + identifier
[58]Ellipsis"..."
 | + ε
[59]Iterable"iterable" "<" Type OptionalType ">" ";"
[60]OptionalType"," Type
 | + ε
[65]ExtendedAttributeList"[" ExtendedAttribute ExtendedAttributes "]"
 | + ε
[66]ExtendedAttributes"," ExtendedAttribute ExtendedAttributes
 | + ε
[67]ExtendedAttribute + "(" ExtendedAttributeInner ")" ExtendedAttributeRest +
 | + "[" ExtendedAttributeInner "]" ExtendedAttributeRest +
 | + "{" ExtendedAttributeInner "}" ExtendedAttributeRest +
 | + Other ExtendedAttributeRest +
[68]ExtendedAttributeRestExtendedAttribute
 | + ε
[69]ExtendedAttributeInner + "(" ExtendedAttributeInner ")" ExtendedAttributeInner +
 | + "[" ExtendedAttributeInner "]" ExtendedAttributeInner +
 | + "{" ExtendedAttributeInner "}" ExtendedAttributeInner +
 | + OtherOrComma ExtendedAttributeInner +
 | + ε +
[70]Other + integer
 | + float
 | + identifier
 | + string
 | + other +
 | + "-"
 | + "-Infinity"
 | + "."
 | + "..."
 | + ":"
 | + ";"
 | + "<"
 | + "="
 | + ">"
 | + "?" +
 | + "ByteString"
 | + "DOMString"
 | + "Infinity"
 | + "NaN"
 | + "USVString"
 | + "any"
 | + "boolean"
 | + "byte"
 | + "double"
 | + "false"
 | + "float" +
 | + "long"
 | + "null"
 | + "object"
 | + "octet"
 | + "or"
 | + "optional"
 | + "sequence" +
 | + "short"
 | + "true"
 | + "unsigned"
 | + "void" +
 | + ArgumentNameKeyword +
 | + BufferRelatedType +
[71]ArgumentNameKeyword + "attribute"
 | + "callback"
 | + "const"
 | + "deleter"
 | + "dictionary" +
 | + "enum"
 | + "getter"
 | + "implements"
 | + "inherit"
 | + "interface"
 | + "iterable" +
 | + "legacycaller"
 | + "partial"
 | + "required"
 | + "serializer"
 | + "setter"
 | + "static"
 | + "stringifier"
 | + "typedef" +
 | + "unrestricted" +
[72]OtherOrCommaOther
 | + ","
[73]TypeSingleType
 | + UnionType Null
[74]SingleTypeNonAnyType
 | + "any"
[75]UnionType"(" UnionMemberType "or" UnionMemberType UnionMemberTypes ")"
[76]UnionMemberTypeNonAnyType
 | + UnionType Null
[77]UnionMemberTypes"or" UnionMemberType UnionMemberTypes
 | + ε
[78]NonAnyTypePrimitiveType Null
 | + PromiseType Null
 | + "ByteString" Null
 | + "DOMString" Null
 | + "USVString" Null
 | + identifier Null
 | + "sequence" "<" Type ">" Null
 | + "object" Null
 | + "Error" Null
 | + "DOMException" Null
 | + BufferRelatedType Null
[79]BufferRelatedType"ArrayBuffer"
 | + "DataView"
 | + "Int8Array"
 | + "Int16Array"
 | + "Int32Array"
 | + "Uint8Array"
 | + "Uint16Array"
 | + "Uint32Array"
 | + "Uint8ClampedArray"
 | + "Float32Array"
 | + "Float64Array"
[80]ConstTypePrimitiveType Null
 | + identifier Null
[81]PrimitiveTypeUnsignedIntegerType
 | + UnrestrictedFloatType
 | + "boolean"
 | + "byte"
 | + "octet"
[82]UnrestrictedFloatType"unrestricted" FloatType
 | + FloatType
[83]FloatType"float"
 | + "double"
[84]UnsignedIntegerType"unsigned" IntegerType
 | + IntegerType
[85]IntegerType"short"
 | + "long" OptionalLong
[86]OptionalLong"long"
 | + ε
[87]PromiseType"Promise" "<" ReturnType ">"
[88]Null"?"
 | + ε
[89]ReturnTypeType
 | + "void"
[90]IdentifierListidentifier Identifiers
[91]Identifiers"," identifier Identifiers
 | + ε
[92]ExtendedAttributeNoArgsidentifier
[93]ExtendedAttributeArgListidentifier "(" ArgumentList ")"
[94]ExtendedAttributeIdentidentifier "=" identifier
[95]ExtendedAttributeIdentListidentifier "=" "(" IdentifierList ")"
[96]ExtendedAttributeNamedArgListidentifier "=" identifier "(" ArgumentList ")"
Note
+

+ The Other + non-terminal matches any single terminal symbol except for + "(", ")", + "[", "]", + "{", "}" + and ",". +

+

+ While the ExtendedAttribute + non-terminal matches any non-empty sequence of terminal symbols (as long as any + parentheses, square brackets or braces are balanced, and the + "," token appears only within those balanced brackets), + only a subset of those + possible sequences are used by the extended attributes + defined in this specification — see + section 3.11 + for the syntaxes that are used by these extended attributes. +

+ + +

B. References

B.1 Normative references

[ECMA-262]
Ecma International. ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ +
[IEEE-754]
ANSI/IEEE. IEEE Standard for Floating-Point Arithmetic. 03 September 2008. URL: http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4610935 +
[PERLRE]
The Perl Foundation. Perl Regular Expressions (Perl 5.8.8). January 2006. URL: http://search.cpan.org/dist/perl/pod/perlre.pod +
[RFC2119]
S. Bradner. IETF. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 +
[RFC2781]
P. Hoffman; F. Yergeau. IETF. UTF-16, an encoding of ISO 10646. February 2000. Informational. URL: https://tools.ietf.org/html/rfc2781 +
[RFC3629]
F. Yergeau. IETF. UTF-8, a transformation format of ISO 10646. November 2003. Internet Standard. URL: https://tools.ietf.org/html/rfc3629 +
[TYPEDARRAYS]
V. Vukicevic; K. Russell. The Khronos Group. Typed Array Specification. 8 February 2011. URL: https://www.khronos.org/registry/typedarray/specs/1.0/ +
[UNICODE]
The Unicode Consortium. The Unicode Standard. URL: http://www.unicode.org/versions/latest/ +

B.2 Informative references

[DOM]
Anne van Kesteren; Aryeh Gregor; Ms2ger; Alex Russell; Robin Berjon. W3C. W3C DOM4. 19 November 2015. W3C Recommendation. URL: http://www.w3.org/TR/dom/ +
[DOM3CORE]
Arnaud Le Hors; Philippe Le Hégaret; Lauren Wood; Gavin Nicol; Jonathan Robie; Mike Champion; Steven B Byrne et al. W3C. Document Object Model (DOM) Level 3 Core Specification. 7 April 2004. W3C Recommendation. URL: http://www.w3.org/TR/DOM-Level-3-Core/ +
[HTML]
Ian Hickson. WHATWG. HTML Standard. Living Standard. URL: https://html.spec.whatwg.org/multipage/ +
[HTML5]
Ian Hickson; Robin Berjon; Steve Faulkner; Travis Leithead; Erika Doyle Navara; Edward O'Connor; Silvia Pfeiffer. W3C. HTML5. 28 October 2014. W3C Recommendation. URL: http://www.w3.org/TR/html5/ +
[OMGIDL]
Object Management Group. CORBA 3.1 – OMG IDL Syntax and Semantics chapter. January 2008. URL: http://www.omg.org/cgi-bin/doc?formal/08-01-04.pdf +
[WEBIDL]
WebIDL. URL: https://heycam.github.io/webidl/ +
[XMLNS]
Tim Bray; Dave Hollander; Andrew Layman; Richard Tobin; Henry Thompson et al. W3C. Namespaces in XML 1.0 (Third Edition). 8 December 2009. W3C Recommendation. URL: http://www.w3.org/TR/xml-names +
diff --git a/test/docs/metadata/appmanifest.html b/test/docs/metadata/appmanifest.html new file mode 100644 index 000000000..cbce2389c --- /dev/null +++ b/test/docs/metadata/appmanifest.html @@ -0,0 +1,3475 @@ + + + + + + Web App Manifest + + + + + +

Abstract

+

+ This specification defines a JSON-based manifest file that provides + developers with a centralized place to put metadata associated with a + web application. This metadata includes, but is not limited to, the web + application's name, links to icons, as well as the preferred URL to + open when a user launches the web application. The manifest also allows + developers to declare a default orientation for their web application, + as well as providing the ability to set the display mode for the + application (e.g., in fullscreen). Additionally, the manifest allows a + developer to "scope" a web application to a URL. This restricts the + URLs to which the manifest is applied and provides a means to "deep + link" into a web application from other applications. +

+

+ Using this metadata, user agents can provide developers with means to + create user experiences that are more comparable to that of a native + application. +

+

+ To associate documents of a web application with a manifest, this + specification defines the manifest link type as a + declarative means for a document to be associated with a manifest. +

+

Status of This Document

+

+ This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. +

+ +
Warning
+

+ Implementors need to be aware that this specification is not stable. + However, aspects of this specification are shipping in at least one + browser (see links to implementation status at the top of this + document). Implementors who are not taking part in the + discussions will find the specification changing out from under them + in incompatible ways. Vendors interested in implementing + this specification before it eventually reaches the Candidate + Recommendation phase should subscribe to the repository + on GitHub and take part in the discussions. +

+
+ +

+ This document was published by the Web Platform Working Group as a Working Draft. + This document is intended to become a W3C Recommendation. + If you wish to make comments regarding this document, please send them to + public-webapps@w3.org + (subscribe, + archives). + + All comments are welcome. +

+

+ Publication as a Working Draft does not imply endorsement by the W3C + Membership. This is a draft document and may be updated, replaced or obsoleted by other + documents at any time. It is inappropriate to cite this document as other than work in + progress. +

+

+ This document was produced by + a group + operating under the + 5 February 2004 W3C Patent + Policy. + W3C maintains a public list of any patent + disclosures + made in connection with the deliverables of + the group; that page also includes + instructions for disclosing a patent. An individual who has actual knowledge of a patent + which the individual believes contains + Essential + Claim(s) must disclose the information in accordance with + section + 6 of the W3C Patent Policy. +

+

This document is governed by the 1 September 2015 W3C Process Document. +

+ +
+ +
+

1. + Usage Examples +

This section is non-normative.

+

+ This section shows how developers can make use of the various features + of this specification. +

+
+

1.1 + Example manifest +

This section is non-normative.

+

+ The following shows a typical manifest. +

+
Example 1: typical manifest
{
+  "lang": "en",
+  "name": "Super Racer 2000",
+  "short_name": "Racer2K",
+  "icons": [{
+        "src": "icon/lowres",
+        "sizes": "64x64",
+        "type": "image/webp"
+      }, {
+        "src": "icon/hd_small",
+        "sizes": "64x64"
+      }, {
+        "src": "icon/hd_hi",
+        "sizes": "128x128",
+        "density": 2
+      }],
+  "splash_screens": [{
+        "src": "splash/lowres",
+        "sizes": "320x240"
+      }, {
+        "src": "splash/hd_small",
+        "sizes": "1334x750"
+      }, {
+        "src": "splash/hd_hi",
+        "sizes": "1920x1080",
+        "density": 3
+      }],
+  "scope": "/racer/",
+  "start_url": "/racer/start.html",
+  "display": "fullscreen",
+  "orientation": "landscape",
+  "theme_color": "aliceblue",
+  "background_color": "red"
+}
+
+ +
+
+

2. + Installable web applications +

+

+ This document attempts to address the Use Cases and + Requirements for Installable Web Apps . +

+

+ A web application is installed if the + user agent has provided the end-user with a means of instantiating a + new top-level browsing context that has the manifest's members + applied to it. That is, the manifest's members, or their defaults, are + in effect on the top-level browsing context (see application + context). +

+

+ An example of installing would be a user agent that allows the + end-user to add a shortcut to a web application on their device's + homescreen (using the name and one of the icons found in the manifest). + Then, when the end-user launches a web application through this + explicit means, the manifest is applied to the browsing context prior + to the start URL being loaded. This gives the user agent time to + apply the relevant values of the manifest, possibly changing the + display mode and screen orientation of the web application. +

+

+ Alternatively, an application context can be launched through a deep + link (a URL that is within scope of the installed web application); + in which case, the manifest is applied and the deep link is loaded + within the context of web application. +

+
+

2.1 + Authority of the manifest's metadata +

+

+ When a manifest is linked from a Document, it + indicates to the user agent that the metadata is + authoritative: that is, the user agent SHOULD use the + metadata of the manifest instead of the one in the + Document. However, in cases where metadata is missing, + or in error, a user agent MAY fallback to the Document + to find suitable replacements for missing manifest members (e.g., + using application-name in place of + short_name). +

+
+
+

2.2 + Installability signals +

This section is non-normative.

+

+ By design, this specification does not provide developers with an + explicit API to "install" a web application. Instead, a + manifest can serve as an installability signal to a user agent + that a web application can be installed. +

+

+ Examples of installability signals for a web application: +

+
    +
  • is associated with a manifest with at least a + name member and a suitable icon. +
  • +
  • is served over a secure network connection. +
  • +
  • has a sensible content security policy. +
  • +
  • is able to responsibly adapt to display on a variety of screen + sizes, catering for both mobile and desktop. +
  • +
  • is able to function without a network connection. +
  • +
  • is repeatedly used by the end-user over some extended period of + time. +
  • +
  • has been explicitly marked by the user as one that they value and + trust (e.g., by bookmarking or "starring" it). +
  • +
+

+ This list is not exhaustive and some installability signals + might not apply to all user agents. How a user agent makes use of + these installability signals to determine if a web application + can be installed is left to implementers. +

+
+
+ +
+

4. + Display modes +

+

+ A display mode represents how the web application is being + presented within the context of an OS (e.g., in fullscreen, etc.). + Display modes correspond to user interface (UI) metaphors and + functionality in use on a given platform. The UI conventions of the + display modes are purely advisory and implementers are free to + interpret them how they best see fit. +

+

+ Once a user agent applies a particular display mode to an + application context, it becomes the default display + mode for the top-level browsing context (i.e., it is used + as the display mode when the window is navigated). The user + agent MAY override the default display mode for security reasons + (e.g., the top-level browsing context is navigated to + another origin) and/or the user agent MAY provide the user with a means + of switching to another display mode. +

+

+ When the display member is missing, or if there is no + valid display member, the user agent uses the + browser display mode as the default display + mode. As such, the user agent is REQUIRED to support the + browser display mode. +

+

+ Each display mode, except browser, has a + fallback display mode, + which is the display mode that the user agent can try to use if + it doesn't support a particular display mode. If the user agent + does support a fallback display mode, then it checks to see if + it can use that display mode's fallback display mode. + This creates a fallback chain, with the default display mode + (browser) being the last item in the chain. +

+
+

+ For example, Awesome Browser only supports the + minimal-ui and browser display modes, but a + developer declares that she wants fullscreen in the + manifest. In this case, the user agent will first check if it + supports fullscreen (it doesn't), so it falls back to + standalone (which it also doesn't support), and + ultimately falls back to minimal-ui. +

+
+

+ The display modes values and their corresponding fallback + display modes are as follows: +

+
+
+ fullscreen +
+
+ Opens the web application without any user agent chrome and takes up + the entirety of the available display area. +
+
+ The fallback display mode for fullscreen is + standalone. +
+
+ standalone +
+
+ Opens the web application to look and feel like a standalone native + application. This can include the application having a different + window, its own icon in the application launcher, etc. In this mode, + the user agent will exclude UI elements for controlling navigation, + but can include other UI elements such as a status bar. +
+
+ The fallback display mode for standalone is + minimal-ui. +
+
+ minimal-ui +
+
+ This mode is similar to fullscreen, but provides the end-user + with some means to access a minimal set of UI elements for + controlling navigation (i.e., back, forward, reload, and perhaps some + way of viewing the document's address). A user agent can include + other platform specific UI elements, such as "share" and "print" + buttons or whatever is customary on the platform and user agent. +
+
+ The fallback display mode for minimal-ui is + browser. +
+
+ browser +
+
+ Opens the web application using the platform-specific convention for + opening hyperlinks in the user agent (e.g., in a browser tab or a new + window). +
+
+ The browser display mode doesn't have a + fallback display mode (conforming user agents are required to + support the browser display mode). +
+
+
Note

+ The fullscreen display mode is orthogonal to, and works + independently of, the [WHATWG-FULLSCREEN] API. The + fullscreen display mode affects the + fullscreen state of the browser window, while the [WHATWG-FULLSCREEN] + API operates on an element contained within the viewport. As such, a + web application can have its display mode set to + fullscreen, while + document.fullScreenElement returns null, and + fullscreenEnabled returns false. +

+
+

4.1 + The 'display-mode' media feature +

+

+ The display-mode media feature represents, + via a CSS media query [MEDIAQ], the display mode of the web + application. This media feature applies to the top-level browsing + context and any child browsing contexts. Child browsing contexts + reflect the display mode of the top-level browsing + context. +

+

+ A user agent MUST expose the 'display-mode' media + feature irrespective of whether a manifest is being applied to a + browsing context. For example, if the end-user puts the whole user + agent into fullscreen, then the user agent would reflect this change + to CSS and scripts via the 'display-mode' media feature. +

+
Note
+

+ Please note that the fullscreen display mode is + not directly related to the CSS :fullscreen + pseudo-class specified in the [WHATWG-FULLSCREEN] API. The + :fullscreen pseudo-class matches exclusively when a + [HTML] element is put into the fullscreen element stack. + However, a side effect of calling the + requestFullscreen() method on an element using the + [WHATWG-FULLSCREEN] API is that the browser window can enter a + fullscreen mode at the OS-level. In such a case, both + :fullscreen and (display-mode: + fullscreen) will match. +

+

+ On some platforms, it is possible for a user to put a browser + window into fullscreen without the aid of the [WHATWG-FULLSCREEN] + API. When this happens, the :fullscreen pseudo class + will not match, but (display-mode: fullscreen) will + match. This is exemplified in CSS code below. +

+
Example 3
/* applies when the window is fullscreen */
+@media all and (display-mode: fullscreen) {
+    ...
+}
+
+/* applies when an element goes fullscreen */
+#game:fullsceen{
+    ...
+}
+
+
+
+ Value: +
+
+ fullscreen | standalone | minimal-ui | + browser +
+
+ Applies to: +
+
+ visual media types +
+
+ Accepts min/max prefixes: +
+
+ No +
+
+

+ A user agent MUST reflect the applied display mode of the web + application via a CSS media query [MEDIAQ]. +

+
+

4.1.1 + Examples +

+

+ An example in CSS: +

+
Example 4
@media all and (display-mode: minimal-ui) {
+  /* ... */
+}
+@media all and (display-mode: standalone) {
+  /* ... */
+}
+

+ Accessing the display-mode media feature in ECMAScript through + matchMedia() of [cssom-view]: +

+
Example 5
const standalone = matchMedia( '(display-mode: standalone)' );
+
+standalone.onchange = (e) => {
+  /* handle changes to display mode */
+}
+
+if (standalone.matches) {
+  /* do standalone things */
+}
+
+
+

4.1.2 + Security and privacy considerations +

+

+ The 'display-mode' media feature allows an origin + access to aspects of a user’s local computing environment and, + together with the display member, allows an origin + some measure of control over a user agent’s native UI: Through a + CSS media query, a script can know the display mode of a web + application. An attacker could, in such a case, exploit the fact + that an application is being displayed in fullscreen to mimic the + user interface of another application. +

+

+ Furthermore, by neglecting to define a scope member in the + manifest, it's possible to put a web application into a display + mode that persists cross-origin (for legacy reasons, this is + the default behavior). In case where the navigation scope is + unbounded, it is left to the user agent to either stop applying the + manifest when a cross-origin navigation occurs or to show some sort + of security warning to the user. +

+
+
+
+
+

5. + Associating a resource with a manifest +

+

+ A resource is said to be associated with a manifest if the + resource representation, an HTML document, has a manifest link relationship. +

+
+

5.1 + Linking to a manifest +

+

+ The manifest keyword can be used with a [HTML] + link element. This keyword creates an external + resource link. +

+ + + + + + + + + + + + + + + + +
+ Link type + + Effect on... + + Brief description +
+ link + + a and area +
+ manifest + + External Resource + + not allowed + + Imports or links to a manifest. +
+

+ The media type for a manifest serves as the default media type + for resources associated with the manifest link type. +

+
Note

+ In cases where more than one link element with a + manifest link type appears in a Document, + the user agent uses the first link element in + tree order and ignores all subsequent link + element with a manifest link type (even if the first + element was erroneous). See the steps for obtaining a + manifest. +

+

+ To obtain a manifest, the user agent MUST run the steps for + obtaining a manifest. The appropriate time to obtain the manifest + is left up to implementations. A user agent MAY opt to delay fetching + a manifest until after the document and its other resources have been + fully loaded (i.e., to not delay the availability of content and + scripts required by the document). +

+

+ A manifest is obtained and applied regardless of the + media attribute of the link + element matches the environment or not. +

+
+
+
+

6. + Manifest life-cycle +

+

+ This section defines algorithms for obtaining, + processing, and applying a + manifest, and gives recommendations to implementers on how to + react when the manifest is updated. +

+
+

6.1 + Obtaining a manifest +

+

+ The steps for obtaining a manifest are given by the + following algorithm. The algorithm, if successful, returns a + processed manifest and the manifest URL; otherwise, + it terminates prematurely and returns nothing. In the case of nothing + being returned, the user agent MUST ignore the manifest declaration. In + running these steps, a user agent MUST NOT delay the load + event. +

+
    +
  1. From the Document of the top-level browsing + context, let manifest link be the first + link element in tree order whose + rel attribute contains the token manifest. +
  2. +
  3. If manifest link is null, terminate this + algorithm. +
  4. +
  5. If manifest link's href attribute's value + is the empty string, then abort these steps. +
  6. +
  7. Let manifest URL be the result of parsing the + value of the href attribute, relative to the + element's base URL. If parsing fails, then abort these steps. +
  8. +
  9. Let request be a new [FETCH] request, whose + URL is manifest URL, and whose context is + "manifest". +
  10. +
  11. If the manifest link's crossOrigin + attribute's value is 'use-credentials', then set + request's credentials to 'include'. +
  12. +
  13. Await the result of performing a fetch with + request, letting response be the result. +
  14. +
  15. If response is a network error, terminate this + algorithm. +
  16. +
  17. Let manifest be the result of running the steps for + processing a manifest with response's body as the + text, manifest URL, and the URL that represents + the address of the top-level browsing context. +
  18. +
  19. Return manifest and manifest URL. +
  20. +
+
Note
+

+ Authors are encouraged to use the HTTP cache directives to + explicitly cache the manifest. For example, the following response + would cause a cached manifest to be used 30 days from the time the + response is sent: +

+
HTTP/1.1 200 OK
+Cache-Control: max-age=2592000
+Content-Type: application/manifest+json
+
+{
+  "lang": "en",
+  "name": "Super Racer 2000",
+  "start_url": "/start.html",
+  "display": "fullscreen",
+  "orientation": "landscape"
+}
+
+
+

6.1.1 + Content security policy +

+

+ A user agent MUST support [CSP3]. +

+
+

+ The manifest-src and + default-src directives govern the origins + from which a user agent can fetch a manifest. As with + other directives, by default the manifest-src + directive is *, meaning that a user agent can, + [CORS] permitting, fetch the manifest cross-domain. Remote + origins (e.g., a CDN) wanting to host manifests + for various web applications will need to include the appropriate + [CORS] response header in their HTTP response (e.g., + Access-Control-Allow-Origin: https://example.com). +

+
+ manifest-src directive example illustrated +
Fig. 1 + For a [HTML] document, [CSP3]'s + manifest-src directive controls the sources + from which a [HTML] document can load a manifest from. The + same CSP policy's img-src directive controls where + the icon's images can be fetched from. +
+
+
+
+
+
+

6.2 + Processing the manifest +

+

+ When instructed to issue a developer warning, the user + agent MAY report the conformance violation to the developer in a + user-agent-specific manner (e.g., report the problem in an error + console), or MAY ignore the error and do nothing. +

+

+ When instructed to ignore, the user agent MUST act as if + whatever manifest, member, or value caused the condition is absent. +

+

+ The following algorithm provides an extension point: other + specifications that add new members to the manifest are encouraged to + hook themselves into this specification at this point in the + algorithm. +

+
Note
+

+ The extension point is meant to help avoid issues related to + monkey patching. +

+
+

+ The steps for processing a manifest are given by the + following algorithm. The algorithm takes a text string as + an argument, which represents a manifest, and a URL + manifest URL, which represents the location of the + manifest, and a URL document URL. The output from + inputting an JSON document into this algorithm is a processed + manifest . +

+
    +
  1. Let parsed manifest be an empty object. +
  2. +
  3. Let manifest be the result of + parsing text. If + parsing throws an error: +
      +
    1. + Issue a developer warning with any details pertaining to + the JSON parsing error. +
    2. +
    3. Set manifest to be the result of + parsing the string "{}". +
    4. +
    +
  4. +
  5. If Type(manifest) is not "object": +
      +
    1. + Issue a developer warning that the manifest needs to be + an object. +
    2. +
    3. Set manifest to be the result of + parsing the string "{}". +
    4. +
    +
  6. +
  7. + Extension point: process any proprietary and/or other + supported members at this point in the algorithm. +
  8. +
  9. Let start URL of parsed manifest be the + result of running the steps for processing the + start_url member with manifest, + manifest URL, and document URL as arguments. +
  10. +
  11. Let display mode of parsed manifest be the + result of running the steps for processing the + display member with manifest as the + argument. +
  12. +
  13. Let orientation of parsed manifest be the + result of running the steps for processing the + orientation member with manifest and + display mode as arguments. +
  14. +
  15. Let name of parsed manifest be the result + of running the steps for processing the name + member with manifest as the argument. +
  16. +
  17. Let language of parsed manifest be the + result of running the steps for processing the lang + member with manifest as the argument. +
  18. +
  19. Let short name of parsed manifest be the + result of running the steps for processing the + short_name member with manifest as the + argument. +
  20. +
  21. Let icons of parsed manifest be the result + of running the steps for processing an array of images with + manifest, manifest URL, and "icons" as + arguments. +
  22. +
  23. Let splash screens of parsed manifest be + the result of running the steps for processing an array of + images with manifest, manifest URL, and + "splash_screen" as arguments. +
  24. +
  25. Let scope of parsed manifest be the result + of running the steps for processing the scope + member with manifest, manifest URL, + document URL, start URL as arguments. +
  26. +
  27. Let related applications of parsed manifest + be the result of running the steps for processing the + related_applications member with manifest + as argument. +
  28. +
  29. Let prefer related applications of parsed + manifest be the result of running the steps for processing + the prefer_related_applications member with + manifest as argument. +
  30. +
  31. Let theme color of parsed manifest be the + result of running the steps for processing the + theme_color member with manifest as + argument. +
  32. +
  33. Let background_color of parsed manifest be + the result of running the steps for processing the + background_color member with manifest as + argument. +
  34. +
  35. Return parsed manifest. +
  36. +
+
+
+

6.3 + Applying the manifest +

+

+ A manifest is applied to a + top-level browsing context, meaning that the members of the + manifest are affecting the presentation or behavior of a + browsing context. +

+

+ A top-level browsing context that has a manifest applied to it + is referred to as an application context. +

+

+ If an application context is created as a result of the user + agent being asked to navigate to a deep link, the user + agent MUST immediately navigate to the deep link with + replacement enabled. Otherwise, when the application + context is created, the user agent MUST immediately + navigate to the start URL with replacement + enabled. +

+
+

+ Please note that the start URL is not necessarily the value + of the start_url member: the user or user agent + could have changed it when the application was added to home-screen + or otherwise bookmarked. +

+
+

+ The appropriate time to apply a manifest is when the + application context is created and before navigation to + the start URL begins. +

+
+
+

6.4 + Updating the manifest +

+
Issue 384: Updating is under/incorrectly specified

The spec says that the user agent may "periodically check if the contents of a manifest has been modified (e.g., by honoring HTTP cache directives associated with the manifest or by checking for updates after the web application has been launched)."

+ +

Firstly, being able to periodically check the contents of the manifest requires that the manifest URL does not change. If the manifest URL changes, there's no way to know that any new manifest corresponds to the same app as the old manifest and the only way for the app's metadata to be updated is for the user to re-install the app.

+ +

The removal of the same-origin restriction on manifest URLs was intended mainly to allow CDNs to host the manifest on a separate origin to the app, but the above assumes that CDNs are well behaved in that they never change the URL of the manifest, or as a minimum provide a redirect. @slightlylate assures me that CDNs these days are well behaved and do not change URLs of resources. Does anyone have any additional data on that?

+ +

So assuming the manifest URL does not change, or at least provides redirects, the user agent can "periodically check the contents" of the manifest to see whether something has been updated. Then the spec says that "in the event that the members of the manifest have been updated, as determined by running the steps for processing a manifest and seeing if anything has changed, the user agent MAY update the metadata corresponding to the web application ".

+ +

Now during the "steps for processing the manifest" the spec says that the start_url must be checked to be same origin with the document URL, that is the document the app was installed from. The same is true for the scope property. If the user agent is just "periodically checking the contents" of the manifest, does it need to also keep a record of the URL of the document the app was installed from, in order to make these checks? Note that this URL may be different for every installation as an app can be installed from any page of the app.

+ +

So assuming that CDNs behave well, the manifest URL doesn't change, and we've stored the document URL, we can then periodically check the contents of the manifest. Does this work for authenticated manifest URLs too? How does the user agent get the necessary credentials to fetch the manifest? Is the check only done by the user agent when the user logs in and uses the app?

+ +

It sounds like the implementation in Chrome may also be based on some additional assumptions regarding a registered Service Worker scope, is that the case? @mounirlamouri ?

+ +

If the assumptions above are in fact assumptions made by the spec, do we need to write something in the spec about the fact that user agents do not expect the manifest URL to change, and that therefore CDNs are expected to keep the same URL for the manifest or provide a redirect?

+

+ By using the manifest URL, a user agent MAY periodically check + if the contents of a manifest has been modified (e.g., by honoring + HTTP cache directives associated with the manifest or by checking for + updates after the web application has been launched). In the event + that the members of the manifest have been updated, as determined by + running the steps for processing a manifest and seeing if + anything has changed, the user agent MAY update the metadata + corresponding to the web application (e.g., by replacing the name, + icons, navigation scope, or whatever other data has been + changed). +

+

+ In addition, even if the manifest has not been modified, the user + agents MAY periodically check if resources referenced from a manifest + (e.g., the icons) have been modified by honoring HTTP cache + directives. If any resources have been modified, the user agent MAY + replace any stale resources. +

+

+ To avoid one application masquerading as another, it is RECOMMENDED + that users be made aware of any such updates using implementation or + platform specific conventions. +

+
+
+
+

7. + Manifest and its members +

+

+ A manifest is a JSON document that contains startup + parameters and application defaults for when a web application is + launched. A manifest consists of a top-level object that + contains zero or more members. Each of the members are defined below, + as well as how their values are processed. +

+

+ Every manifest has an associated manifest URL, which the + [URL] from which the manifest was fetched. +

+
+

7.1 + lang member +

+

+ The lang member is a + language tag (string) that specifies the primary language for + the values of the manifest's name and + short_name members. +

+

+ A language tag is a string that matches the production of + a Language-Tag defined in the [BCP47] specifications + (see the IANA + Language Subtag Registry for an authoritative list of possible + values, see also the + Maintenance Agency for ISO 3166 country codes). That is, a + language range is composed of one or more subtags that are + delimited by a U+002D HYPHEN-MINUS ("-"). For example, the + 'en-AU' language range represents English as spoken in + Australia, and 'fr-CA' represents French as spoken in + Canada. Language tags that meet the validity criteria of [RFC5646] + section 2.2.9 that can be verified without reference to the IANA + Language Subtag Registry are considered structurally valid. +

+

+ The steps for processing the lang member is + given by the following algorithm. The algorithm takes a + manifest as an argument. This algorithm returns a string + or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of manifest + with argument "lang". +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Otherwise, Trim(value) and let tag + be the result. +
  6. +
  7. If calling IsStructurallyValidLanguageTag with + tag as the argument returns false, then: +
      +
    1. + issue a developer warning that the value is + invalid. +
    2. +
    3. Return undefined. +
    4. +
    +
  8. +
  9. Otherwise, return the result of calling the + CanonicalizeLanguageTag abstract operation, passing + tag as the argument. +
  10. +
+
+
+

7.2 + name member +

+

+ The name member is a + string that represents the name of the web application as it + is usually displayed to the user (e.g., amongst a list of other + applications, or as a label for an icon). +

+

+ The steps for processing the name member is + given by the following algorithm. The algorithm takes a + manifest as an argument. This algorithm returns a string + or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of manifest + with argument "name". +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Otherwise, Trim(value) and return the result. +
  6. +
+
+
+

7.3 + short_name member +

+

+ The short_name member + is a string that represents a short version of the name of the + web application. It is intended to be used where there is + insufficient space to display the full name of the web application. +

+

+ The steps for processing the short_name + member is given by the following algorithm. The algorithm takes + a manifest as an argument. This algorithm returns a string + or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of manifest + with argument "short_name". +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Otherwise, Trim(value) and return the result. +
  6. +
+
+
+

7.4 + scope member +

+
Issue 380: Provide better examples of scope

People are confused by the lack of examples relating to scope:
+manifoldjs/ManifoldJS#42 (comment)

+

+ The scope member is a + string that represents the navigation scope of this web application's + application context. +

+

+ The steps for processing the scope member is + given by the following algorithm. The algorithm takes a + manifest manifest, a URL manifest + URL , a URL document URL, and a URL + start URL . This algorithm returns a URL or + undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of the manifest + with argument "scope". +
  2. +
  3. Let type be Type(value). +
  4. +
  5. If type is not "string" or value is the + empty string, then: +
      +
    1. If type is not "undefined", issue + a developer warning that the type is unsupported. +
    2. +
    3. Return undefined. +
    4. +
    +
  6. +
  7. Let scope URL be a new URL using + value as input and manifest URL as + the base URL. +
  8. +
  9. If scope URL is failure: + +
  10. +
  11. If scope URL is not same + origin as document URL: +
      +
    1. + Issue a developer warning that the scope + needs to be + same-origin as Document of the application + context. +
    2. +
    3. Return undefined. +
    4. +
    +
  12. +
  13. If start URL is not within scope of scope URL: +
      +
    1. + Issue a developer warning that the start URL is not + within scope of the navigation scope. +
    2. +
    3. Return undefined. +
    4. +
    +
  14. +
  15. Otherwise, return scope URL. +
  16. +
+
+
+

7.5 + splash_screens member +

+
Issue 372: A means to control how long a splash screen should remain on screen.

An app might want to show its splash screen for a minimum amount of time.

+

+ The splash_screens + member is an array of image objects that can + serve as a loading screen for the web application. A splash screen + indicates to the end user that a loading process is occurring (in + effect, that the web application is being prepared by the user agent + in the background). As the splash_screens member is an + array of image objects, developers can use unique image + objects definitions to target minimum screen resolutions and pixel + densities. +

+

+ This feature is primarily intended for user agents that take a + perceptible amount of time to self-initialize (e.g., take longer than + 200ms to be able to render content) - as such, it is OPTIONAL for a + user agent to display a splash screen while the web application is + being loaded. On launching a web application, if displaying a splash + screen, the user agent MUST queue a post-load task on the + Document of the start URL to remove the + splashscreen, or, alternatively can follow platform conventions for + how long a splash screen is displayed (e.g., a minimum of 1 second). +

+
Note

+ The splash_screens member is processed using the steps + for processing an array of images. +

+
+
+

7.6 + icons member +

+

+ The icons member is an + array of image objects that can serve as iconic + representations of the web application in various contexts. For + example, they can be used to represent the web application amongst a + list of other applications, or to integrate the web application with + an OS's task switcher and/or + system preferences. +

+
Note

+ The icons member is processed using the steps for + processing an array of images. +

+

+ If there are multiple equally appropriate icons in icons, + a user agent MUST use the last one declared in order at the time that + the user agent collected the list of icons. If the user + agent tries to use an icon but that icon is determined, upon closer + examination, to in fact be inappropriate (e.g. because its content + type is unsupported), then the user agent MUST try the + next-most-appropriate icon as determined by examining the image + object's members. +

+
+

+ In the following example, the developer has made the following + choices about the icons associated with the web application: +

+
    +
  • The developer has included two icons at the same size, but in + two different formats. One is explicitly marked as WebP through the + type member. If the user agent doesn't support WebP, + it falls back to the second icon of the same size (and density). + The media type of this icon can then be either determined via a + HTTP header, or can be sniffed by the user agent once the first few + bytes of the icon are received. +
  • +
  • The developer wants to use an SVG icon for devices with at + least 2dppx as the display density and only when the available + dimensions are at least 72px. She has found that the SVG file looks + too blurry at small sizes, even on high-density screens. To deal + with this problem, she's included an SVG icon that is only used + when the dimensions are at least 72px and the pixel density is at + least 2dppx. Otherwise, the user agent uses the ICO file + (hd_hi.ico), which includes a gamut of icons individually tailored + for small display sizes. +
  • +
+
{
+  "icons": [
+      {
+        "src": "icon/lowres.webp",
+        "sizes": "48x48",
+        "type": "image/webp"
+      },{
+        "src": "icon/lowres",
+        "sizes": "48x48"
+      },{
+        "src": "icon/hd_hi.ico",
+        "sizes": "72x72 96x96 128x128 256x256"
+      },{
+        "src": "icon/hd_hi.svg",
+        "sizes": "72x72",
+        "density": 2
+      }]
+ }
+
+
+
+
+

7.7 + display member +

+

+ The display member is a + string, whose value is one of display modes values. The + item represents the developer's preferred display mode for the + web application. When the member is missing or erroneous, the user + agent MUST use the fallback display mode. +

+

+ The steps for processing the display member + are given by the following algorithm. The algorithm takes a manifest + manifest as an argument, and returns a string. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of manifest + passing "display" as the argument. +
  2. +
  3. If Type(value) is not "string" or + Trim(value) is not part of the display modes + values: +
      +
    1. If Type(value) is not "undefined", issue + a developer warning that the type is unsupported. +
    2. +
    3. If value is not part of the display modes + values, issue a developer warning that the value is + unsupported. +
    4. +
    5. Return the fallback display mode's value. +
    6. +
    +
  4. +
  5. Otherwise, Trim(value) and set value + to be the result. +
  6. +
  7. If value is not a display mode that the user + agent supports, set value to value's + fallback display mode and re-run this step. +
  8. +
  9. Return value. +
  10. +
+
+
+

7.8 + orientation member +

+

+ The orientation + member is a string that serves as the default + orientation for all top-level browsing contexts of the web + application. The possible values are those of the + OrientationLockType enum defined in + [SCREEN-ORIENTATION]. +

+

+ If the user agent honors the value of the orientation + member as the default orientation, then that serves as the + default orientation for the life of the web application + (unless overridden by some other means at runtime). This means that + the user agent MUST return the orientation to the default + orientation any time the orientation is unlocked + [SCREEN-ORIENTATION] or the top-level browsing context is + navigated. +

+

+ Although the specification relies on the [SCREEN-ORIENTATION]'s + OrientationLockType, it is OPTIONAL for a user + agent to implement the [SCREEN-ORIENTATION] API. Supporting the + [SCREEN-ORIENTATION] API is, of course, RECOMMENDED. +

+

+ Certain UI/UX concerns and/or platform conventions will mean that + some screen orientations and display modes cannot be used + together . Which orientations and display modes cannot be used + together is left to the discretion of implementers. For example, for + some user agents, it might not make sense to change the default + orientation of an application while in browser + display mode. +

+
Note

+ Once the web application is running, other means can change the + orientation of a top-level browsing context (such as via + [SCREEN-ORIENTATION] API). +

+

+ The steps for processing the orientation + member are given by the following algorithm. The algorithm + takes a manifest manifest and display mode + display mode as an argument, and returns a string. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of manifest + with argument "orientation". +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", issue + a developer warning that the type is not supported. +
    2. +
    3. Return the empty string. +
    4. +
    +
  4. +
  5. Otherwise, Trim(value) and set value + to be the result. +
  6. +
  7. If value is not one of the + OrientationLockType enum values, or + value is unsupported by the user agent, or the + value cannot be used together with display + mode: +
      +
    1. + Issue a developer warning. +
    2. +
    3. Return the empty string. +
    4. +
    +
  8. +
  9. Return value. +
  10. +
+
+
+

7.9 + start_url member +

+

+ The start_url member is a string that + represents the start URL , which is URL that the + developer would prefer the user agent load when the user launches the + web application (e.g., when the user clicks on the icon of the web + application from a device's application menu or homescreen). +

+

+ The start_url member is purely advisory, and a + user agent MAY ignore it or provide the end-user the choice + not to make use of it. A user agent MAY also allow the end-user to + modify the URL when, for instance, a bookmark for the web application + is being created or any time thereafter. +

+

+ The steps for processing the start_url member + are given by the following algorithm. The algorithm takes a + manifest manifest, a URL manifest + URL , and a URL document URL. This algorithm + returns a URL. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of the manifest + with argument "start_url". +
  2. +
  3. Let type be Type(value). +
  4. +
  5. If type is not "string" or value is the + empty string: +
      +
    1. If type is not "undefined", issue + a developer warning that the type is unsupported. +
    2. +
    3. Return a new URL whose input is + document URL. +
    4. +
    +
  6. +
  7. Let url be a new URL using value as + input and manifest URL as the base + URL. +
  8. +
  9. If url is failure: + +
  10. +
  11. If url is not same origin as document URL: +
      +
    1. + Issue a developer warning that the + start_url needs to be same-origin as + Document of the top-level browsing context. +
    2. +
    3. Return a new URL whose input is + document URL. +
    4. +
    +
  12. +
  13. Otherwise, return url. +
  14. +
+
+

+ For example, if the value of start_url is + ../start_point.html, and the manifest's URL is + https://example.com/resources/manifest.webmanifest, + then the result of URL parsing would be + https://example.com/start_point.html. +

+
+
+
+

7.10 + theme_color member +

+

+ The theme_color + member serves as the default theme color for an + application context. What constitutes a theme color is + defined in [META-THEME-COLOR]. +

+

+ If the user agent honors the value of the theme_color + member as the default theme color, then that color serves as + the theme color for all browsing contexts to which the + manifest is applied. However, a document may override the + default theme color through the inclusion of a [HTML] + meta element that conforms to [META-THEME-COLOR]. A + user agent MUST return the theme color to the default theme + color when there are no meta elements that conform + to [META-THEME-COLOR] in a document, or the top-level browsing + context is navigated to a URL that is within scope. +

+

+ The steps for processing the theme_color + member are given by the following algorithm. The algorithm + takes a manifest as an argument. This algorithm returns a + string or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of manifest + with argument "theme_color". +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Otherwise, let potential color be the result of + running [CSS-SYNTAX-3]'s parse a component value algorithm + with value as input. If parsing returns a syntax error, + return undefined. +
  6. +
  7. Let color be the result of attempting to parse + potential color as a CSS color, as per [CSS-SYNTAX-3]. + If parsing fails: +
      +
    1. + Issue a developer warning. +
    2. +
    3. Return undefined. +
    4. +
    +
  8. +
  9. Return color. +
  10. +
+
+ + +
+

7.13 + background_color member +

+

+ The background_color + member describes the expected background color of the web + application. It repeats what is already available in the application + stylesheet but can be used by the user agent to draw the + background color of a web application for which the manifest is known + before the files are actually available, whether they are fetched + from the network or retrieved from disk. +

+

+ The background_color member is only meant to improve the + user experience while a web application is loading and MUST NOT be + used by the user agent as the background color when the web + application's stylesheet is available. +

+

+ The steps for processing the background_color + member are given by the following algorithm. The algorithm + takes a manifest as an argument. This algorithm returns a + string or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of manifest + with argument "background_color". +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Otherwise, let potential color be the result of + running [CSS-SYNTAX-3]'s parse a component value algorithm + with value as input. If parsing returns a syntax error, + return undefined. +
  6. +
  7. Let color be the result of attempting to parse + potential color as a CSS color, as per [CSS-SYNTAX-3]. + If parsing fails: +
      +
    1. + Issue a developer warning. +
    2. +
    3. Return undefined. +
    4. +
    +
  8. +
  9. Return color. +
  10. +
+
+
+
+

8. + Image object and its members +

+
Issue 361: Ability to define platform-specific icon/splash style

Different platforms have different guidelines regarding their icon style, and an app may need to follow them to look "native".

+ +

Wrong style, even if slightly wrong, can create "uncanny valley", e.g. multiple OSes use square icons with rounded corners, but with a different radius. Icon with a wrong corner radius looks like an amateurish knock-off.

+ +

The spec currently only matches icons on size and density, but not style/theme, so sites would need to resort to user-agent sniffing if they wanted to serve system-specific icons.

+ +

A theoretical solution available in HTML, with help of Moz's nonstandard media queries, is:

+ +
<link rel=icon href="generic.png">
+<link rel=icon media="all and -moz-windows-theme:luna-blue" href="xp.png">
+<link rel=icon media="all and -moz-windows-theme:aero" href="vista.png">
+
+

+ Each image object + represents an image that is used as part of a web application, suitable + to use in various contexts depending on the semantics of the member + that is using the object (e.g., an icon that is part of an application + menu, a splashscreen, etc.). For an image object, this specification + provides developers with a means of specifying the dimensions, optimal + pixel density, and media type of an image (i.e., a "responsive image" + solution [respimg-usecases]). A user agent can use these values to + select an image that is best suited to display on the end-user's device + or most closely matches the end-user's preferences. +

+
+

8.1 + Content security policy of image objects +

+

+ The security policy that governs whether a user agent can + fetch an icon image is governed by the img-src directive + [CSP3] associated with the manifest's owner Document. +

+
+

+ For example, given the following img-src directive in + the Content-Security-Policy HTTP header of the + manifest's owner Document: +

+
HTTP/1.1 200 OK
+Content-Type: text/html
+Content-Security-Policy: img-src icons.example.com
+
+<!doctype>
+<html>
+<link rel="manifest" href="manifest.webmanifest">
+
+
+

+ And given the following manifest.webmanifest: +

+
{
+    "name": "custom manifest",
+    "start_url": "http://boo",
+    "icons": [{
+        "src": "//icons.example.com/lowres"
+      },
+      {
+        "src": "//other.com/hi-res"
+      }]
+}
+

+ The fetching of icon resources from + icons.example.com/lowres would succeed, while fetching + from other.com/hi-res would fail. +

+
+
+
+

8.2 + density member +

+

+ The density member of an image object + is the device pixel density for which this image was designed. The + device pixel density is expressed as the number of dots per 'px' unit + (equivalent to a dppx as defined in [css3-values]). The value is a + positive number greater than 0. If the developer omits the value, the + user agent assumes the value 1.0. +

+

+ The steps for processing a density member of an + image are given by the following algorithm. The algorithm takes + an image image object as an argument and returns a + positive number. +

+
    +
  1. If [[HasOwnProperty]] internal method of image + passing density as the argument returns + false: +
      +
    1. Return 1.0. +
    2. +
    +
  2. +
  3. Let value be the result of calling the + [[GetOwnProperty]] internal method of image passing " + density" as the argument. +
  4. +
  5. Let result be the result of parseFloat( + value); +
  6. +
  7. If result is NaN, +∞, or less than or + equal to +0, then: +
      +
    1. + Issue a developer warning. +
    2. +
    3. Return 1.0. +
    4. +
    +
  8. +
  9. Return result. +
  10. +
+
+
+

8.3 + sizes member +

+

+ The sizes member of an image object is a + string consisting of an unordered set of unique + space-separated tokens which are ASCII case-insensitive + that represents the dimensions of an image. Each keyword is either an + ASCII case-insensitive match for the string "any", or a + value that consists of two valid non-negative integers that do + not have a leading U+0030 DIGIT ZERO (0) character and that are + separated by a single U+0078 LATIN SMALL LETTER X or U+0058 LATIN + CAPITAL LETTER X character. The keywords represent icon sizes in raw + pixels (as opposed to CSS pixels). When multiple image objects + are available, a user agent MAY use the value to decide which icon is + most suitable for a display context (and ignore any that are + inappropriate). +

+

+ The steps for processing a sizes member of an + image are given by the following algorithm. The algorithm takes + an image object image. This algorithm will return a + set. +

+
    +
  1. Let sizes be an empty set. +
  2. +
  3. Let value be the result of calling the + [[GetOwnProperty]] internal method of image + passing " sizes" as the argument. +
  4. +
  5. Let type be Type(value). +
  6. +
  7. If type is not "string", then: +
      +
    1. If type is not "undefined", issue + a developer warning that the type is unsupported. +
    2. +
    3. Return undefined. +
    4. +
    +
  8. +
  9. Otherwise, parse value as if it was a [HTML] + sizes attribute and let keywords be the + result. +
  10. +
  11. For each keyword in keywords: +
      +
    1. Convert keyword to ASCII lowercase and add + the resulting string to sizes. +
    2. +
    +
  12. +
  13. Return sizes. +
  14. +
+
+
+

8.4 + src member +

+

+ The src member of an + image object is a URL from which a user agent can fetch + the image's data. +

+

+ The steps for processing the src member of an + image are given by the following algorithm. The algorithm takes + a image object icon, and a URL manifest + URL , which is the URL from which the + manifest was fetched. This algorithm will return a + URL or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of image + passing " src" as the argument. +
  2. +
  3. Let type be Type(value). +
  4. +
  5. If type is not "string", then: +
      +
    1. If type is not "undefined", issue a + developer warning that the type is unsupported. +
    2. +
    3. Return undefined. +
    4. +
    +
  6. +
  7. If Trim(value) is the empty string, then return + undefined. +
  8. +
  9. Otherwise, parse value using manifest + URL as the base URL and return the result. +
  10. +
+
+
+

8.5 + type member +

+

+ The type member of an image object is + a hint as to the media type of the image. The purpose of this member + is to allow a user agent to ignore images of media types it does not + support. +

+

+ There is no default MIME type for image objects. However, for the + purposes of determining the type of the resource, user agents + must expect the resource to be an image. +

+

+ The steps for processing the type member of an + image are given by the following algorithm. The algorithm takes + an image object as an argument, and returns either a + string or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of potential + image passing "type" as the argument. +
  2. +
  3. Let type be Type(value). +
  4. +
  5. If type is not "string", then: +
      +
    1. If type is not "undefined", issue a + developer warning that the type is unsupported. +
    2. +
    3. Return undefined. +
    4. +
    +
  6. +
  7. Trim(value) and set value to be resulting + string. +
  8. +
  9. If value is not a valid MIME type or the value + of type is not a supported media format, issue a developer + warning and return undefined. +
  10. +
  11. Return value. +
  12. +
+
+
+

8.6 + Processing an array of images +

+

+ The steps for processing an array of images are given by + the following algorithm. The algorithm takes a manifest, a URL + manifest URL, which is the URL from which the + manifest was fetched, and a string that represents the + member name of the member which contains the array of + image objects. This algorithm returns a list of image + objects, which can be empty. +

+
    +
  1. Let images be an empty list. +
  2. +
  3. Let unprocessed images be the result of calling the + [[GetOwnProperty]] internal method of manifest + with member name as the argument. +
  4. +
  5. If unprocessed images is an array, then: +
      +
    1. From unprocessed images, filter out any item where + HasOwnProperty(item,"src") returns false. +
    2. +
    3. For each potential image in the array: +
        +
      1. Let src be the result of running the steps + for processing the src member of an image + with potential image and manifest URL. +
      2. +
      3. If src is undefined, move onto + the next item in images (if any are left). +
      4. +
      5. Otherwise, let image be an object with + properties src, type, + sizes, and density. All properties + initially set to undefined. +
      6. +
      7. Set image's src property to be + src. +
      8. +
      9. Let type be the result of running the steps + for processing the type member of an image + passing potential image. +
      10. +
      11. If type is not undefined, set + image's type property to be + type. +
      12. +
      13. Let sizes be the list that result from running + the steps for processing a sizes member of an + image passing potential image. +
      14. +
      15. If sizes is not undefined, set + image's sizes property to be + sizes. +
      16. +
      17. Let density be the result from running the + steps for processing a density member of an + image are given by the passing potential + image . +
      18. +
      19. If density is not undefined, set + image's density property to be + value. +
      20. +
      21. Append image to images. +
      22. +
      +
    4. +
    +
  6. +
  7. Otherwise, if unprocessed images is not + undefined: +
      +
    1. + Issue a developer warning that the type is not + supported. +
    2. +
    +
  8. +
  9. Return images. +
  10. +
+
+
+
+

9. + Application object and its members +

+

+ Each application object represents an application related to + the web application. An application object has three properties: a + platform which represents the platform it is associated + to, a url which represents the URL where the application + can be found and an id which can be used as an information + additional to the URL or instead of the URL, depending on the platform. + A valid application object MUST have platform and + either an url or an id (or both). +

+
+

+ In the following example, the web application is listing two + different related applications, one on Google Play Store and the + other one on the iTunes Store: +

+
{
+  "related_applications": [
+      {
+        "platform": "play",
+        "url": "https://play.google.com/store/apps/details?id=com.example.app1",
+        "id": "com.example.app1"
+      }, {
+        "platform": "itunes",
+        "url": "https://itunes.apple.com/app/example-app1/id123456789",
+      }]
+ }
+
+
+
Issue

+ Where should the platform expected value be listed? +

+
+

9.1 + platform member +

+

+ The platform member of an + application object represents the platform on which the application + can be found. +

+

+ The steps for processing the platform member of an + application are given by the following algorithm. The algorithm + takes an application object application. This + algorithm will return a string or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of application + passing "platform" as the argument. +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Otherwise, Trim(value) and return the result. +
  6. +
+
+
+

9.2 + url member +

+

+ The url member of an application + object represents the URL at which the application can be + found. +

+

+ The steps for processing the url member of an + application are given by the following algorithm. The algorithm + takes an application object application. This + algorithm will return an URL or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of application + passing "url" as the argument. +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Trim(value) and set value to be resulting + string. +
  6. +
  7. Otherwise, parse value and if the result is not + failure, return the result, otherwise return undefined. +
  8. +
+
+
+

9.3 + id member +

+

+ The id member of an application + object represents the id which is used to represent the application + on the platform. +

+

+ The steps for processing the id member of an + application are given by the following algorithm. The algorithm + takes an application object application. This + algorithm will return a string or undefined. +

+
    +
  1. Let value be the result of calling the + [[GetOwnProperty]] internal method of application + passing "id" as the argument. +
  2. +
  3. If Type(value) is not "string": +
      +
    1. If Type(value) is not "undefined", + optionally issue a developer warning that the type is not + supported. +
    2. +
    3. Return undefined. +
    4. +
    +
  4. +
  5. Otherwise, Trim(value) and return the result. +
  6. +
+
+
+
+

10. + Common conventions and dependencies +

+

+ The + [[GetOwnProperty]] operation and the abstract operation + + hasOwnProperty , + parseFloat(string) function, and the + Type(x) notation are defined in + [ECMASCRIPT]. +

+

+ When instructed to Trim(x), a user agent MUST + behave as if [ECMASCRIPT]'s + String.prototype.trim() function had been called on the string + x. +

+

+ As the manifest uses the JSON format, this specification relies on the + types defined in [ECMA-404] specification: namely object, + array, number, string, + true, false, and null. Strict + type checking is not enforced by this specification. Instead, each + member's definition specifies the steps required to process a + particular member and what to do when a type does not match what is + expected. +

+

+ The URL concept and + URL parser + are defined in [WHATWG-URL]. +

+

+ The + default orientation concept and the + OrientationLockType enum, are defined in + [SCREEN-ORIENTATION]. +

+

+ The algorithm to parse + a component value is defined in [CSS-SYNTAX-3]. +

+

+ The + manifest-src, + img-src, and + default-src directives are defined in + [CSP3]. +

+

+ The IsStructurallyValidLanguageTag + and CanonicalizeLanguageTag + abstract operations are defined in [ECMAS-402]. +

+

+ The following are defined in [FETCH]: +

+ +

+ The following are defined in [HTML]: +

+ +
+
+

11. + IANA considerations +

+

+ The following registrations are for community review and will be + submitted to the IESG for + review, approval, and registration with IANA. +

+
+

11.1 + Media type registration +

+

+ This section contains the required text for MIME media type + registration with IANA. +

+

+ The media type for a manifest is + application/manifest+json. +

+

+ If the protocol over which the manifest is transferred supports the + [MIME-TYPES] specification (e.g. HTTP), it is RECOMMENDED that the + manifest be labeled with the media type for a manifest. +

+
+
+ Type name: +
+
+ application +
+
+ Subtype name: +
+
+ manifest+json +
+
+ Required parameters: +
+
+ N/A +
+
+ Optional parameters: +
+
+ N/A +
+
+ Encoding considerations: +
+
+ Same as for application/json +
+
+ Security and privacy considerations: +
+
+
Issue 348: Triage privacy issues

Lots of feedback here...
+https://lists.w3.org/Archives/Public/public-privacy/2015JanMar/0118.html

+ +

cc @npdoty, who we need to work with to address the above...

+

+ This specification does not directly deal with high-value data. + However, installed web applications and their data could + be seen as "high value" (particularly from a privacy + perspective). +

+

+ As the manifest format is JSON and will commonly be encoded using + [UNICODE], the security considerations described in + [ECMA-404] and [UNICODE-SECURITY] apply. In addition, + because there is no way to prevent developers from including + custom/unrestrained data in a manifest, implementors need + to impose their own implementation-specific limits on the values + of otherwise unconstrained member types, e.g. to prevent denial + of service attacks, to guard against running out of memory, or to + work around platform-specific limitations. +

+

+ Web applications will generally contain ECMAScript, HTML, CSS + files, and other media, which are executed in a sand-boxed + environment. As such, implementors need to be aware of the + security implications for the types they support. Specifically, + implementors need to consider the security implications outlined + in at least the following specifications: [CSS-MIME], + [ECMAScript-MIME], [HTML]. +

+

+ As web applications can contain content that is able to + simultaneously interact with the local device and a remote host, + implementors need to consider the privacy implications resulting + from exposing private information to a remote host. Mitigation + and in-depth defensive measures are an implementation + responsibility and not prescribed by this specification. However, + in designing these measures, implementors are advised to enable + user awareness of information sharing, and to provide easy access + to interfaces that enable revocation of permissions. +

+

+ As this specification allows for the declaration of URLs within + certain members of a manifest, implementors need to consider the + security considerations discussed in the [WHATWG-URL] + specification. Implementations intending to display IRIs and + IDNA addresses + found in the manifest are strongly encouraged to follow the + security advice given in [UNICODE-SECURITY]. +

+

+ Developers need to be aware of the security considerations + discussed throughout the [CSP3] specification, particularly in + relation to making data: a valid source for the + purpose of inlining a manifest. Doing so can enable XSS + attacks by allowing a manifest to be included directly in the + document itself; this is best avoided completely. +

+
+
+ Applications that use this media type: +
+
+ Web browsers +
+
+ Additional information: +
+
+
+
+ Magic number(s): +
+
+ N/A +
+
+ File extension(s): +
+
+ .webmanifest +
+
+ Macintosh file type code(s): +
+
+ TEXT +
+
+
+
+ Person & email address to contact for further information: +
+
+ The Web + Platform Working Group can be contacted at + public-webapps@w3.org. +
+
+ Intended usage: +
+
+ COMMON +
+
+ Restrictions on usage: +
+
+ none +
+
+ Author: +
+
+ W3C's Web Platform Working Group. +
+
+ Change controller: +
+
+ W3C. +
+
+
+ +
+

12. Conformance

+

+ As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, + and notes in this specification are non-normative. Everything else in this specification is + normative. +

+

The key words MAY, MUST, MUST NOT, OPTIONAL, RECOMMENDED, REQUIRED, and SHOULD are + to be interpreted as described in [RFC2119]. +

+ +

+ There is only one class of product that can claim conformance to this + specification: a user agent. +

+
Note

+ Although this specification is primarily targeted at web browsers, it + is feasible that other software could also implement this specification + in a conforming manner. For instance, search engines, or crawlers, + could find and process manifests to build up catalogs of sites that + potentially work as installable web applications. +

+
+

12.1 + Extensibility +

This section is non-normative.

+

+ This specification is designed to be extensible. Other specifications + are encouraged to define new members for the manifest. However, in + doing so, please follow the conventions used in this specification. + In particular, use the extension point to hook into the + steps for processing a manifest. Also, be sure to specify the + steps for processing your particular member in the manner set forth + in this specification. This will help keep this part of the platform + consistent. +

+

+ When specifying a new member, don't override or monkey patch + anything defined in this specification. Also, don't assume your + member will be processed before or after any other member. Keep your + new member, and its processing, atomic and self contained. Note also + that implementations are free to ignore any member they do not + recognize or support. +

+

+ If you are writing a specification and temporarily want to patch this + specification to help implementations along, file a bug so the + community is informed of what you are trying to do. +

+
+

12.1.1 + Proprietary manifest members +

This section is non-normative.

+

+ Although proprietary extensions are undesirable, they can't + realistically be avoided. As such, the RECOMMENDED way to add a new + proprietary manifest member as an extension is to use a vendor + prefix. +

+

+ The following is an example of two hypothetical vendor extensions. +

+
Example 6: vendor extensions
{
+  ...
+  "webkit_fancy_feature": "some/url/img",
+  "moz_awesome_thing": { ... }
+  ...
+}
+
+
+
+ +
+

B. + JSON Schema +

+

+ Developers interested in validating manifest documents can find + an unofficial JSON + schema for the manifest format at schemastore.org. It is licensed under + Apache + 2.0. It is kindly maintained by Mads Kristensen. If you find + any issues with the JSON schema, please file a bug at + the SchemaStore + repository on GitHub. +

+
+
+

C. + internationalization +

+
Issue 323: Add note about i18n

The note should include:

+ +
    +
  • + The spec should details how we expect developers to localize content. Like by: <link rel=manifest href='manifest?lang=en'> +
  • +
  • + An example showing the use of a language other than English.
  • +
  • + Add discussion of obtaining a correctly localized reference, particularly for use in cases where the source page itself has used language negotiation.
  • +
+
+

D. Issue Summary

  • Issue 363: Deep linking - need proper use cases and requirements analysis
  • Issue 384: Updating is under/incorrectly specified
  • Issue 380: Provide better examples of scope
  • Issue 372: A means to control how long a splash screen should remain on screen.
  • Issue 365: Is prefer_related_applications too simplistic?
  • Issue 361: Ability to define platform-specific icon/splash style
  • Issue 348: Triage privacy issues
  • Issue 323: Add note about i18n
+
+

E. + Acknowledgments +

+

+ This document reuses text from the [HTML] specification, edited by + Ian Hickson, as permitted by the license of that specification. +

+

Dave Raggett and Dominique Hazael-Massieux contributed to this specification via the HTML5Apps project.

+
+ + +

F. References

F.1 Normative references

[BCP47]
A. Phillips; M. Davis. IETF. Tags for Identifying Languages. September 2009. IETF Best Current Practice. URL: https://tools.ietf.org/html/bcp47 +
[CORS]
Anne van Kesteren. W3C. Cross-Origin Resource Sharing. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/cors/ +
[CSP3]
Mike West. W3C. Content Security Policy Level 3. 26 January 2016. W3C Working Draft. URL: http://www.w3.org/TR/CSP3/ +
[CSS-MIME]
H. Lie; B. Bos; C. Lilley. IETF. The text/css Media Type. March 1998. Informational. URL: https://tools.ietf.org/html/rfc2318 +
[CSS-SYNTAX-3]
Tab Atkins Jr.; Simon Sapin. W3C. CSS Syntax Module Level 3. 20 February 2014. W3C Candidate Recommendation. URL: http://www.w3.org/TR/css-syntax-3/ +
[ECMA-404]
Ecma International. The JSON Data Interchange Format. 1 October 2013. Standard. URL: http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf +
[ECMAS-402]
Ecma International. ECMAScript Internationalization API Specification. URL: https://tc39.github.io/ecma402/ +
[ECMASCRIPT]
Ecma International. ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ +
[ECMAScript-MIME]
B. Hoehrmann. IETF. Scripting Media Types. April 2006. Informational. URL: https://tools.ietf.org/html/rfc4329 +
[FETCH]
Anne van Kesteren. WHATWG. Fetch Standard. Living Standard. URL: https://fetch.spec.whatwg.org/ +
[HTML]
Ian Hickson. WHATWG. HTML Standard. Living Standard. URL: https://html.spec.whatwg.org/multipage/ +
[MEDIAQ]
Florian Rivoal et al. W3C. Media Queries. 19 June 2012. W3C Recommendation. URL: http://www.w3.org/TR/css3-mediaqueries/ +
[META-THEME-COLOR]
WHATWG. The 'theme-color' meta extension. Living Standard. URL: https://github.com/whatwg/meta-theme-color +
[MIME-TYPES]
N. Freed; N. Borenstein. IETF. Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types. November 1996. Draft Standard. URL: https://tools.ietf.org/html/rfc2046 +
[RFC2119]
S. Bradner. IETF. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 +
[RFC5646]
A. Phillips, Ed.; M. Davis, Ed.. IETF. Tags for Identifying Languages. September 2009. Best Current Practice. URL: https://tools.ietf.org/html/rfc5646 +
[SCREEN-ORIENTATION]
Mounir Lamouri; Marcos Caceres. W3C. The Screen Orientation API. 23 December 2015. W3C Working Draft. URL: http://www.w3.org/TR/screen-orientation/ +
[UNICODE]
The Unicode Consortium. The Unicode Standard. URL: http://www.unicode.org/versions/latest/ +
[UNICODE-SECURITY]
Mark Davis; Michel Suignard. Unicode Consortium. Unicode Security Considerations. URL: http://www.unicode.org/reports/tr36/ +
[URL]
Anne van Kesteren; Sam Ruby. W3C. URL. 9 December 2014. W3C Working Draft. URL: http://www.w3.org/TR/url-1/ +
[WHATWG-URL]
Anne van Kesteren; Sam Ruby. WHATWG. URL Standard. Living Standard. URL: https://url.spec.whatwg.org/ +

F.2 Informative references

[WHATWG-FULLSCREEN]
Anne van Kesteren. WHATWG. Fullscreen API Standard. Living Standard. URL: https://fullscreen.spec.whatwg.org/ +
[css3-values]
Tab Atkins Jr.; Elika Etemad. W3C. CSS Values and Units Module Level 3. 11 June 2015. W3C Candidate Recommendation. URL: http://www.w3.org/TR/css-values/ +
[cssom-view]
Simon Pieters; Glenn Adams. W3C. CSSOM View Module. 17 December 2013. W3C Working Draft. URL: http://www.w3.org/TR/cssom-view/ +
[respimg-usecases]
Marcos Caceres; Mathew Marquis; Yoav Weiss; David Newton. W3C. Use Cases and Requirements for Standardizing Responsive Images. 7 November 2013. W3C Note. URL: http://www.w3.org/TR/respimg-usecases/ +
diff --git a/test/docs/metadata/csvw-ucr.html b/test/docs/metadata/csvw-ucr.html new file mode 100644 index 000000000..7271b621f --- /dev/null +++ b/test/docs/metadata/csvw-ucr.html @@ -0,0 +1,4439 @@ + + + + + + CSV on the Web: Use Cases and Requirements + + + + + + +

Abstract

+

A large percentage of the data published on the Web is tabular data, commonly published as + comma separated values (CSV) files. The CSV on the Web Working Group aim to specify + technologies that provide greater interoperability for data dependent applications on the + Web when working with tabular datasets comprising single or multiple files using CSV, or + similar, format.

+

This document lists the use cases compiled by the Working Group that are considered + representative of how tabular data is commonly used within data dependent applications. The + use cases observe existing common practice undertaken when working with tabular data, often + illustrating shortcomings or limitations of existing formats or technologies. This document + also provides a set of requirements derived from these use cases that have been used to + guide the specification design.

+

Status of This Document

+ + + +

+ This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. +

+ + + +

This is a draft document which may be merged into another document or eventually make its + way into being a standalone Working Draft.

+ + + +

+ This document was published by the CSV on the Web Working Group as a Working Group Note. + + + If you wish to make comments regarding this document, please send them to + public-csv-wg@w3.org + (subscribe, + archives). + + + + + + + All comments are welcome. + + +

+ + + + +

+ Publication as a Working Group Note does not imply endorsement by the W3C + Membership. This is a draft document and may be updated, replaced or obsoleted by other + documents at any time. It is inappropriate to cite this document as other than work in + progress. +

+ + + +

+ + This document was produced by + + a group + operating under the + 5 February 2004 W3C Patent + Policy. + + + + + W3C maintains a public list of any patent + disclosures + + made in connection with the deliverables of + + the group; that page also includes + + instructions for disclosing a patent. An individual who has actual knowledge of a patent + which the individual believes contains + Essential + Claim(s) must disclose the information in accordance with + section + 6 of the W3C Patent Policy. + + +

+ +

This document is governed by the 1 September 2015 W3C Process Document. +

+ + + + + + +

Table of Contents

+ +
+

1. Introduction

+

A large percentage of the data published on the Web is tabular data, commonly published as + comma separated values (CSV) files. CSV files may be of a significant size but they can be + generated and manipulated easily, and there is a significant body of software available to + handle them. Indeed, popular spreadsheet applications (Microsoft Excel, iWork’s Number, or + OpenOffice.org) as well as numerous other applications can produce and consume these files. + However, although these tools make conversion to CSV easy, it is resisted by some publishers + because CSV is a much less rich format that can't express important detail that the + publishers want to express, such as annotations, the meaning of identifier codes etc.

+

Existing formats for tabular data are format-oriented and hard to process (e.g. Excel); + un-extensible (e.g. CSV/TSV); or they assume the use of particular technologies (e.g. SQL + dumps). None of these formats allow developers to pull in multiple data sets, manipulate, + visualize and combine them in flexible ways. Other information relevant to these datasets, + such as access rights and provenance, is not easy to find. CSV is a very useful and simple + format, but to unlock the data and make it portable to environments other than the one in + which it was created, there needs to be a means of encoding and associating relevant + metadata.

+

To address these issues, the CSV on the Web Working Group seeks to provide:

+
    +
  • Metadata vocabulary for CSV data
  • +
  • Access methods for CSV Metadata
  • +
  • Mapping mechanism to transforming CSV into various formats (e.g., RDF [rdf11-concepts], JSON [RFC7159], or XML [xml])
  • +
+

In order to determine the scope of and elicit the requirements for this extended + CSV format (CSV+) a set of use cases have been compiled. Each use case provides a narrative + describing how a representative user works with tabular data to achieve their goal, + supported, where possible, with example datasets. The use cases observe existing common + practice undertaken when working with tabular data, often illustrating shortcomings or + limitations of existing formats or technologies. It is anticipated that the additional + metadata provided within the CSV+ format, when coupled with metadata-aware tools, will + simplify how users work with tabular data. As a result, the use cases seek to identify where + user effort may be reduced.

+

A set of requirements, used to guide the development of the CSV+ specification, have been + derived from the compiled use cases.

+
+
+

2. Use Cases

+

The use cases below describe many applications of tabular data. Whilst there are many + different variations of tabular data, all the examples conform to the definition of + tabular data defined in the Model for Tabular Data and Metadata on the Web [tabular-data-model]:

+

Tabular data is data that is structured into rows, each of which + contains information about some thing. Each row contains the same number of fields + (although some of these fields may be empty), which provide values of properties of + the thing described by the row. In tabular data, fields within the same column provide + values for the same property of the thing described by the particular row.

+

In selecting the use cases we have reviewed a number of row oriented data + formats that, at first glance, appear to be tabular data. However, closer inspection + indicates that one or other of the characteristics of tabular data were not present. + For example, the HL7 format, + from the health informatics domain defines a separate schema for each row (known as + a "segment" in that format) which means that HL7 messages do not have a regular + number of columns for each row.

+
+

2.1 Use Case #1 - Digital preservation of government records

+

+ (Contributed by Adam Retter; supplemental information about use of XML provided by Liam Quin) +

+

The laws of England and Wales place obligations upon departments and The National Archives for the collection, + disposal and preservation of records. Government departments are obliged within the Public Records Act 1958 sections 3, 4 and 5 to select, transfer, preserve and make + available those records that have been defined as public records. These obligations apply + to records in all formats and media, including paper and digital records. Details + concerning the selection and transfer of records can be found here.

+

Departments transferring records to TNA must catalogue or list the selected records + according to The National Archives' defined cataloguing principles and standards. + Cataloguing is the process of writing a description, or Transcriptions of Records + for the records being transferred. Once each Transcription of Records is added to the + Records Catalogue, records can be subsequently discovered and accessed using the supplied + descriptions and titles.

+

TNA specifies what information should be provided within a Transcriptions of Records and + how that information should be formatted. A number of formats and syntaxes are supported, + including RDF. However, the predominant format used for the exchange of Transcriptions of + Records is CSV as the government departments providing the Records lack either the + technology or resources to provide metadata in the XML and RDF formats preferred by the + TNA.

+

A CSV-encoded Transcriptions of Records typically describes a set of Records, often + organised within a hierarchy. As a result, it is necessary to describe the + interrelationships between Records within a single CSV file.

+

Each row within a CSV file relates to a particular Record and is allocated a unique + identifier. This unique identifier behaves as a primary key for the Record within the + scope of the CSV file and is used when referencing that Record from within other Record + transcriptions. The unique identifier is unique within the scope of the datafile; in + order for the Record to be referenced from outside this datafile, the local identifier + must be mapped to a globally unique identifier such as a URI.

+ +

+ Requires: + PrimaryKey, + URIMapping and + ForeignKeyReferences. +

+ +

Upon receipt by TNA, each of the Transcriptions of Records is validated against the (set + of) centrally published data definition(s); it is essential that received CSV metadata + comply with these specifications to ensure efficient and error free ingest into the + Records Catalogue.

+

The validation applied is dependent the type of entity described in each row. Entity type + is specified in a specific column (e.g. type).

+

The data definition file, or CSV Schema, used by the CSV Validation Tool effectively + forms the basis of a formal contract between TNA and supplying organisations. For more + information on the CSV Validation Tool and CSV Schema developed by TNA please refer to the + online + documentation.

+

The CSV Validation Tool is written in Scala + version 2.10.

+ +

+ Requires: + WellFormedCsvCheck and + CsvValidation. +

+ +

Following validation, the CSV-encoded Transcriptions of Records are transformed into RDF + for insertion into the triple store that underpins the Records Catalogue. The CSV is initially + + transformed into an interim XML format using XSLT and then processed further using a mix + of XSLT, Java and Scala to create RDF/XML. The CSV files do + not include all the information required to undertake the transformation, e.g. defining + which RDF properties are to be used when creating triples for the data value in each cell. + As a result, bespoke software has been created by TNA to supply the necessary additional + information during the CSV to RDF transformation process. The availability of generic + mechanisms to transform CSV to RDF would reduce the burden of effort within TNA when + working with CSV files.

+ +

+ Requires: + SyntacticTypeDefinition, SemanticTypeDefinition and CsvToRdfTransformation. +

+ +

+ In this particular case, RDF is the target format for the conversiono f the CSV-encoded + Transcriptions of Records. However, the conversion of CSV to XML (in this case used as + an interim conversion step) is illustrative of a common data conversion workflow.

+

+ The transformation outlined above is typical of common practice in that it uses a + freely-available XSLT transformation or XQuery parser (in this case + Andrew Wlech's CSV to + XML converter in XSLT 2.0) which is then modified to meet the specific usage requirements.

+

The resulting XML document can then be used include further transformed using XSLTto create + XHTML documention - perhaps including charts such histograms to present summary data.

+ +

+ Requires: + CsvToXmlTransformation. +

+ +
+
+

2.2 Use Case #2 - Publication of National Statistics

+

+ (Contributed by Jeni Tennison) +

+

The Office for National Statistics (ONS) is the UK’s + largest independent producer of official statistics and is the recognised national + statistical institute for the UK. It is responsible for collecting and publishing + statistics related to the economy, population and society at national, regional and local + levels.

+

Sets of statistics are typically grouped together into datasets comprising of collections of + related tabular data. Within their underlying information systems, ONS maintains a clear + separation between the statistical data itself and the metadata required for interpretation. + ONS classify the metadata into two categories:

+
    +
  • structural metadata: dimensionality, sort order, axis metadata, axis ordering etc.
  • +
  • reference metadata: linked descriptive information.
  • +
+

These datasets are published on-line in both CSV format and as Microsoft + Excel Workbooks that have been manually assembled from the underlying data.

+

For example, refer to dataset QS601EW Economic activity, derived from the + 2011 Census, is available as a precompiled Microsoft Excel Workbook for several sets of + administrative geographies, e.g. + 2011 Census: QS601EW Economic activity, local authorities in England and Wales, and in + CSV form via the ONS Data Explorer.

+

The ONS Data Explorer presents the user with a list of available datasets. A user may choose + to browse through the entire list or filter that list by topic. To enable the user to determine + whether or not a dataset meets their need, summary information is available for each dataset.

+

QS601EW Economic activity provides the following summary information:

+
    +
  • title: Economic activity
  • +
  • dimensions: Economic activity (T016A), 2011 Administrative Hierarchy, 2011 Westminster Parliamentary Constituency Hierarchy
  • +
  • dataset population: All usual residents aged 16 to 74
  • +
  • coverage: England and Wales
  • +
  • area types (list omitted here for brevity)
  • +
  • textual description of dataset
  • +
  • publication information
  • +
  • contact details
  • +
+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+ +

Once the required dataset has been selected, the user is prompted to choose how they + would like the statistical data to be aggregated. In the case of QS601EW Economic + activity, the user is required to choose between the two mutually exclusive + geography types: 2011 Administrative Hierarchy and 2011 Westminster Parliamentary + Constituency Hierarchy. Effectively, the QS601EW Economic activity + dataset is partitioned into two separate tables for publication.

+ +

+ Requires: + GroupingOfMultipleTables. +

+ +

The user is also provided with an option to sub-select only the elements of the + dataset that they deem pertinent for their needs. In the case of QS601EW Economic + activity the user may select data from upto 200 geographic areas within the + dataset to create a data subset that meets their needs. The data subset may be viewed + on-line (presented as an HTML table) or downloaded in CSV or Microsoft Excel formats.

+ +

+ Requires: + CsvAsSubsetOfLargerDataset. +

+ +

An example extract of data for England and Wales in CSV form is provided below. + The data subset is provided as a compressed file containing both a CSV formatted data file + and a complementary html file containing the reference metadata. White space has been added + for clarity. File = + CSV_QS601EW2011WARDH_151277.zip

+ +
Example 1
"QS601EW"
+"Economic activity"
+"19/10/13"
+
+               ,                 ,                                   "Count",                            "Count",                                   "Count",                                   "Count",                                                       "Count",                                                       "Count",                                                          "Count",                                                          "Count",                          "Count",                                 "Count",                              "Count",                         "Count",                                                        "Count",                                              "Count",                                            "Count",                       "Count"
+               ,                 ,                                  "Person",                           "Person",                                  "Person",                                  "Person",                                                      "Person",                                                      "Person",                                                         "Person",                                                         "Person",                         "Person",                                "Person",                             "Person",                        "Person",                                                       "Person",                                             "Person",                                           "Person",                      "Person"
+               ,                 ,               "Economic activity (T016A)",        "Economic activity (T016A)",               "Economic activity (T016A)",               "Economic activity (T016A)",                                   "Economic activity (T016A)",                                   "Economic activity (T016A)",                                      "Economic activity (T016A)",                                      "Economic activity (T016A)",      "Economic activity (T016A)",             "Economic activity (T016A)",          "Economic activity (T016A)",     "Economic activity (T016A)",                                    "Economic activity (T016A)",                          "Economic activity (T016A)",                        "Economic activity (T016A)",   "Economic activity (T016A)"
+"Geographic ID","Geographic Area","Total: All categories: Economic activity","Total: Economically active: Total","Economically active: Employee: Part-time","Economically active: Employee: Full-time","Economically active: Self-employed with employees: Part-time","Economically active: Self-employed with employees: Full-time","Economically active: Self-employed without employees: Part-time","Economically active: Self-employed without employees: Full-time","Economically active: Unemployed","Economically active: Full-time student","Total: Economically inactive: Total","Economically inactive: Retired","Economically inactive: Student (including full-time students)","Economically inactive: Looking after home or family","Economically inactive: Long-term sick or disabled","Economically inactive: Other"
+    "E92000001",        "England",                                "38881374",                         "27183134",                                 "5333268",                                "15016564",                                                      "148074",                                                      "715271",                                                         "990573",                                                        "1939714",                        "1702847",                               "1336823",                           "11698240",                       "5320691",                                                      "2255831",                                            "1695134",                                          "1574134",                      "852450"
+    "W92000004",          "Wales",                                 "2245166",                          "1476735",                                  "313022",                                  "799348",                                                        "7564",                                                       "42107",                                                          "43250",                                                         "101108",                          "96689",                                 "73647",                             "768431",                        "361501",                                                       "133880",                                              "86396",                                           "140760",                       "45894"
+ +

Key characteristics of the CSV file are:

+
    +
  • summary information for entire table provided at beginning of file
  • +
  • multiple header lines
  • +
  • comma delimited cells
  • +
  • double quote escaping of text
  • +
+ + +

+ Requires: + MultipleHeadingRows and + AnnotationAndSupplementaryInfo. +

+ + +

Correct interpretation of the statistics requires additional qualification or + awareness of context. To achieve this the complementary html file includes supplementary information and + annotations pertinent to the data published in the accompanying CSV file. Annotation or references may + be applied to:

+
    +
  • a group of tables
  • +
  • an entire table
  • +
  • a row
  • +
  • a coloumn
  • +
  • an individual cell
  • +
+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+

Furthermore, these statistical data sets make frequent use of predefined category codes + and geographic regions. Dataset QS601EW Economic activity includes two + examples:

+
    +
  • topic category T016A; identifying the statistical measure type - in this case, + whether a person aged 16 or over was in work or looking for work in the week before the census
  • +
  • geographic area codes for 2011 Administrative Hierarchy and 2011 Westminster Parliamentary + Constituency Hierarchy
  • +
+

At present there is no standardised mechanism to associate the catagory codes, + provided as plain text, with their authoritative definitions.

+

+ Requires: + AssociationOfCodeValuesWithExternalDefinitions. +

+

Finally, reuse of the statistical data is also inhibited by a lack of explicit definition + of the meaning of column headings.

+

+ Requires: + SemanticTypeDefinition. +

+
+
+

2.3 Use Case #3 - Creation of consolidated global land surface temperature climate + databank

+

+ (Contributed by Jeremy Tandy) +

+

Climate change and global warming have become one of the most pressing environmental + concerns in society today. Crucial to predicting future change is an understanding of how + the world’s historical climate, with long duration instrumental records of climate being + central to that goal. Whilst there is an abundance of data recording the climate at + locations the world over, the scrutiny under which climate science is put means that much + of this data remains unused leading to a paucity of data in some regions with which to + verify our understanding of climate change.

+ +

The International Surface Temperature + Initiative seeks to create a consolidated global land surface temperatures databank + as an open and freely available resource to climate scientists.

+ +

To achieve this goal, climate datasets, known as “decks”, are gathered from participating + organisations and merged into a combined dataset using a scientifically peer reviewed method which assesses the data records for inclusion against a variety of + criteria.

+ +

Given the need for openness and transparency in creating the databank, it is essential + that the provenance of the source data is clear. Original source data, particularly for + records captured prior to the mid-twentieth century, may be in hard-copy form. In order to + incorporate the widest possible scope of source data, the International Surface + Temperature Initiative is supported by data rescue + activities to digitise hard copy records.

+ +

The data is, where possible, published in the following four stages:

+
    +
  • Stage 0: raw digital image of hard copy records or information as to hard copy + location
  • +
  • Stage 1: data in native format provided
  • +
  • Stage 2: data converted into a common format and with provenance and version control + information appended
  • +
  • Stage 3: merged collation of stage 2 data within a single consolidated dataset
  • +
+ +

The Stage 1 data is typically provided in tabular form - the most common variant is + white-space delimited ASCII files. Each data deck comprises multiple files which are + packaged as a compressed tar ball (.tar.gz). Included within the compressed + tar ball package, and provided alongside, is a read-me file providing unstructured + supplementary information. Summary information is often embedded at the top of each + file.

+ +

For example, see the Ugandan Stage 1 data deck (local copy) and associated readme file (local copy).

+ +

The Ugandan Stage 1 data deck appears to be comprised of two discrete datasets, each + partitioned into a sub-directory within the tar ball: uganda-raw and + uganda-bestguess. Each sub-directory includes a Microsoft Word document + providing supplementary information about the provenance of the dataset; of particular + note is that uganda-raw is collated from 9 source datasets whilst + uganda-bestguess provides what is considered by the data publisher to be + the best set of values with duplicate values discarded.

+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+ +

Dataset uganda-raw is split into 96 discrete files, each providing maximum, + minimum or mean monthly air temperature for one of the 32 weather observation stations + (sites) included in the data set. Similarly, dataset uganda-bestguess is + partitioned into discrete files; this case just 3 files each of which provide maximum, + minimum or mean monthly air temperature data for all sites. The mapping from data file to + data sub-set is described in the Microsoft Word document.

+ +

+ Requires: + CsvAsSubsetOfLargerDataset. +

+ +

A snippet of the data indicating maximum monthly temperature for Entebbe, Uganda, from + uganda-raw is provided below. File = 637050_ENTEBBE_tmx.txt

+ +
Example 2
637050  ENTEBBE
+5
+ENTEBBE BEA     0.05    32.45   3761F
+ENTEBBE GHCNv3G 0.05    32.45   1155M
+ENTEBBE ColArchive      0.05    32.45   1155M
+ENTEBBE GSOD    0.05    32.45   1155M
+ENTEBBE NCARds512       0.05    32.755  1155M
+
+Tmax
+{snip}
+1935.04	27.83	27.80	27.80	-999.00	-999.00
+1935.12	25.72	25.70	25.70	-999.00	-999.00
+1935.21	26.44	26.40	26.40	-999.00	-999.00
+1935.29	25.72	25.70	25.70	-999.00	-999.00
+1935.37	24.61	24.60	24.60	-999.00	-999.00
+1935.46	24.33	24.30	24.30	-999.00	-999.00
+1935.54	24.89	24.90	24.90	-999.00	-999.00
+{snip}
+ +

The key characteristics are:

+
    +
  • white space delimited; this is not strictly a CSV file
  • +
  • summary information pertinent to the “data rows” is included at the beginning of the + data file
  • +
  • row, column and cell value interpretation is informed by accompanying Microsoft Word + document; human intervention is required to unambiguously determine semantics, e.g. the + meaning of each column, the unit of measurement
  • +
  • the observed property is defined as “Tmax”; there is no reference to an authoritative + definition describing that property
  • +
  • there is no header line providing column names
  • +
  • the year and month (column 1) is expressed as a decimal value; e.g. 1901.04 – + equivalent to January, 1901
  • +
  • multiple temperature values (“replicates”) are provided for each row; one from each of + the sources defined in the header, e.g. BEA (British East Africa), + GHCNv3G, ColArchive, GSOD and + NCARds512
  • +
  • the provenance of specific cell values cannot be asserted; for example, data values + for 1935 observed at Entebbe are digitised from digital images published in PDF (local copy)
  • +
+

A snippet of the data indicating maximum monthly temperature for all stations in Uganda + from uganda-bestguess is provided below (truncated to 9 columns). File = ug_tmx_jrc_bg_v1.0.txt

+ +
Example 3
ARUA	BOMBO	BUKALASA	BUTIABA	DWOLI	ENTEBBE AIR	FT PORTAL	GONDOKORO	[…]
+{snip}
+1935.04	-99.00	-99.00	-99.00	-99.00	-99.00	27.83	-99.00	-99.00	[…]
+1935.12	-99.00	-99.00	-99.00	-99.00	-99.00	25.72	-99.00	-99.00	[…]
+1935.21	-99.00	-99.00	-99.00	-99.00	-99.00	26.44	-99.00	-99.00	[…]
+1935.29	-99.00	-99.00	-99.00	-99.00	-99.00	25.72	-99.00	-99.00	[…]
+1935.37	-99.00	-99.00	-99.00	-99.00	-99.00	24.61	-99.00	-99.00	[…]
+1935.46	-99.00	-99.00	-99.00	-99.00	-99.00	24.33	-99.00	-99.00	[…]
+1935.54	-99.00	-99.00	-99.00	-99.00	-99.00	24.89	-99.00	-99.00	[…]
+{snip}
+ +

Many of the characteristics concerning the “raw” file are exhibited here too. + Additionally, we see that:

+
    +
  • the delimiter is now tab (U+0009)
  • +
  • metadata is entirely missing from this file, requiring human intervention to combine + the filename token (tmx) with supplementary information in the accompanying + Microsoft Word document to determine the semantics
  • +
+ +

At present, the global surface temperature databank comprises 25 Stage 1 data decks for + monthly temperature observations. These are provided by numerous organisations in + heterogeneous forms. In order to merge these data decks into a single combined dataset, + each data deck has to be converted into a standard form. Columns consist of: station + name, latitude, longitude, altitude, + date, maximum monthly temperature, minimum monthly + temperature, mean monthly temperature plus additional provenance + information.

+ +

An example Stage 2 data file is given for Entebbe, Uganda, below. File = uganda_000000000005_monthly_stage2

+ +
Example 4
{snip}
+ENTEBBE                            0.0500    32.4500  1146.35 193501XX  2783  1711  2247 301/109/101/104/999/999/999/000/000/000/102
+ENTEBBE                            0.0500    32.4500  1146.35 193502XX  2572  1772  2172 301/109/101/104/999/999/999/000/000/000/102
+ENTEBBE                            0.0500    32.4500  1146.35 193503XX  2644  1889  2267 301/109/101/104/999/999/999/000/000/000/102
+ENTEBBE                            0.0500    32.4500  1146.35 193504XX  2572  1817  2194 301/109/101/104/999/999/999/000/000/000/102
+ENTEBBE                            0.0500    32.4500  1146.35 193505XX  2461  1722  2092 301/109/101/104/999/999/999/000/000/000/102
+ENTEBBE                            0.0500    32.4500  1146.35 193506XX  2433  1706  2069 301/109/101/104/999/999/999/000/000/000/102
+ENTEBBE                            0.0500    32.4500  1146.35 193507XX  2489  1628  2058 301/109/101/104/999/999/999/000/000/000/102
+{snip}
+ +

Because of the heterogeneity of the Stage 1 data decks, bespoke data processing programs + were required for each data deck consuming valuable effort and resource in simple data + pre-processing. If the semantics, structure and other supplementary metadata pertinent to + the Stage 1 data decks had been machine readable, then this data homogenisation stage + could have been avoided altogether. Data provenance is crucial to this initiative, + therefore it would be beneficial to be able to associate the supplementary metadata + without needing to edit the original data files.

+ +

+ Requires: + R-AssociationOfCodeValuesWithExternalDefinitions, + SyntacticTypeDefinition, + SemanticTypeDefinition, + MissingValueDefinition, + NonStandardCellDelimiter and + ZeroEditAdditionOfSupplementaryMetadata. +

+ +

The data pre-processing tools created to parse each Stage 1 data deck into the standard + Stage 2 format and the merge process to create the consolidated Stage 3 data set were + written using the software most familiar to the participating scientists: Fortran 95. The + merge software source code is available online. It is worth noting that this sector of the scientific community also + commonly uses IDL and is + gradually adopting Python as the default software + language choice.

+ +

The resulting merged dataset is published in several formats – including tabular text. + The GHCN-format merged dataset (available from the US National Climatic Data Center's FTP site) comprises of several files: merged data and withheld + data (e.g. those data that did not meet the merge criteria) each with an associated + “inventory” file.

+ +

A snippet of the inventory for merged data is provided below; each row describing one of + the 31,427 sites in the dataset. File = merged.monthly.stage3.v1.0.0-beta4.inv

+ +
Example 5
{snip}
+REC41011874   0.0500  32.4500 1155.0 ENTEBBE_AIRPO
+{snip}
+ +

The columns are: station identifier, latitude, + longitude, altitude (m) and station name. The + data is fixed format rather than delimited.

+ +

Similarly, a snippet of the merged data itself is provided. Given that the original + .dat file is a largely unmanageable 422.6 MB in size, a subset is provided. + File = merged.monthly.stage3.v1.0.0-beta4.snip

+ +
Example 6
{snip}
+REC410118741935TAVG 2245    2170    2265    2195    2090    2070    2059    2080    2145    2190    2225    2165
+REC410118741935TMAX 2780    2570    2640    2570    2460    2430    2490    2520    2620    2630    2660    2590
+REC410118741935TMIN 1710    1770    1890    1820    1720    1710    1629    1640    1670    1750    1790    1740
+{snip}
+ +

The columns are: station identifier, year, quantity + kind and the quantity values for months January to December in that year. Again, + the data is fixed format rather than delimited.

+ +

Here we see the station identifier REC41011874 being used as a foreign key + to refer to the observing station details; in this case Entebbe Airport. Once again, there + is no metadata provided within the file to describe how to interpret each of the data + values.

+ +

+ Requires: + ForeignKeyReferences. +

+ +

The resulting merged dataset provides time series of how the observed climate has changed + over a long duration at approximately 32000 locations around the globe. Such instrumental + climate records provide a basis for climate research. However, it is well known that these + climate records are usually affected by inhomogeneities (artifical shifts) due to changes + in the measurement conditions (e.g. relocation, modification or recalibration + of the instrument etc.). As these artificial shifts often have the same magnitude as the + climate signal, such as long-term variations, trends or cycles, a direct analysis of the + raw time-series data can lead to wrong conclusions about climate change.

+

Statistical homogenisation procedures are used to detect and correct these artificial shifts. + Once detected, the raw time-series data is annotated to indicate the presence of artifical + shifts in the data, details of the homogenisation procedure undertaken and, where possible, + the reasons for those shifts.

+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+ +

Future iterations of the global land surface temperatures databank are aniticipated to + include quality controlled (Stage 4) and homogenised (Stage 5) datasets derived from the + merged dataset (Stage 3) outlined above.

+ +
+ +
+

2.4 Use Case #4 - Publication of public sector roles and salaries

+

+ (Contributed by Jeni Tennison) +

+

In line with the + + G8 open data charter Principle 4: Releasing data for improved governance,the + UK Government publishes information about public sector roles and salaries.

+

The collection of this information is managed by the + Cabinet Office and subsequently + published via the UK Government data portal at data.gov.uk.

+

In order to ensure a consistent return from submitting departments and agencies, the + Cabinet Office mandated that each response conform to a data definition schema, which is described within a narrative PDF document. Each submission comprises a + pair of CSV files - one for senior roles and another for junior roles.

+ +

+ Requires: + GroupingOfMultipleTables, + WellFormedCsvCheck and + CsvValidation. +

+ +

The submission for senior roles from the Higher Education Funding Council for England (HEFCE) is provided + below to illustrate. White space has been added for clarity. File = HEFCE_organogram_senior_data_31032011.csv

+ +
Example 7
Post Unique Reference,              Name,Grade,             Job Title,                Job/Team Function,                            Parent Department,                                Organisation,                             Unit,     Contact Phone,         Contact E-mail,Reports to Senior Post,Salary Cost of Reports (£),FTE,Actual Pay Floor (£),Actual Pay Ceiling (£),,Profession,Notes,Valid?
+                90115,        Steve Egan,SCS1A,Deputy Chief Executive,  Finance and Corporate Resources,Department for Business Innovation and Skills,Higher Education Funding Council for England,  Finance and Corporate Resources,     0117 931 7408,     s.egan@hefce.ac.uk,                 90334,                   5883433,  1,              120000,                124999,,   Finance,     ,     1
+                90250,     David Sweeney,SCS1A,              Director,"Research, Innovation and Skills",Department for Business Innovation and Skills,Higher Education Funding Council for England,"Research, Innovation and Skills",     0117 931 7304, d.sweeeney@hefce.ac.uk,                 90334,                   1207171,  1,              110000,                114999,,    Policy,     ,     1
+                90284,       Heather Fry,SCS1A,              Director,      Education and Participation,Department for Business Innovation and Skills,Higher Education Funding Council for England,      Education and Participation,     0117 931 7280,      h.fry@hefce.ac.uk,                 90334,                   1645195,  1,              100000,                104999,,    Policy,     ,     1
+                90334,Sir Alan Langlands, SCS4,       Chief Executive,                  Chief Executive,Department for Business Innovation and Skills,Higher Education Funding Council for England,                            HEFCE,0117 931 7300/7341,a.langlands@hefce.ac.uk,                    xx,                         0,  1,              230000,                234999,,    Policy,     ,     1
+ +

Similarly, a snippet of the junior role submission from HEFCE is provided. Again, + white space has been added for clarity. File = HEFCE_organogram_junior_data_31032011.csv

+ +
Example 8
.                           Parent Department,                                Organisation,                           Unit,Reporting Senior Post,Grade,Payscale Minimum (£),Payscale Maximum (£),Generic Job Title,Number of Posts in FTE,          Profession
+Department for Business Innovation and Skills,Higher Education Funding Council for England,    Education and Participation,                90284,    4,               17426,               20002,    Administrator,                     2,Operational Delivery
+Department for Business Innovation and Skills,Higher Education Funding Council for England,    Education and Participation,                90284,    5,               19546,               22478,    Administrator,                     1,Operational Delivery
+Department for Business Innovation and Skills,Higher Education Funding Council for England,Finance and Corporate Resources,                90115,    4,               17426,               20002,    Administrator,                  8.67,Operational Delivery
+Department for Business Innovation and Skills,Higher Education Funding Council for England,Finance and Corporate Resources,                90115,    5,               19546,               22478,    Administrator,                   0.5,Operational Delivery
+{snip}        
+ +

Key characteristics of the CSV files are:

+
    +
  • single header line
  • +
  • comma delimited cells
  • +
  • double quote escaping of text cells including the delimiter character (comma)
  • +
+ +

Within the senior role CSV the cell Post Unique Reference provides + a primary key within the data file for each row. In addition, it provides a + unique identifier for the entity described within a given row. In order for the + entity to be referenced from outside this datafile, the local identifier + must be mapped to a globally unique identifier such as a URI.

+ +

+ Requires: + PrimaryKey and + URIMapping. +

+ +

This unique identifier is referenced both from within the senior post dataset, + Reports to Senior Post, and within the junior post dataset, Reporting + Senior Post in order to determine the relationships within the organisational + structure.

+ +

+ Requires: + ForeignKeyReferences. +

+ +

For the most senior role in a given organisation, the Reports to Senior Post + cell is expressed as xx denoting that this post does not report to anyone + within the organisation.

+ +

+ Requires: + MissingValueDefinition. +

+ +

The public sector roles and salaries information is published at + data.gov.uk using an interactive "Organogram Viewer" widget implemented using javascript. + The HEFCE data can be visualized + + here. For convenience, a screenshot is provided in + Fig. 1 Screenshot of Organogram Viewer web application showing HEFCE data.

+ +
+ data.gov.uk-roles-and-salaries-browser.png +
Fig. 1 Screenshot of Organogram Viewer web application showing HEFCE data
+
+ +

In order to create this visualization, each pair of tabular datasets were transformed + into RDF and uploaded into a triple store exposing a SPARQL end-point which the + interactive widget then queries to acquire the necessary data. An example of the derived RDF + is provided in file + HEFCE_organogram_31032011.rdf.

+ +

The transformation from CSV to RDF required bespoke software, supplementing the content + in the CSV files with additional information such as the RDF properties for each column. + The need to create and maintain bespoke software incurs costs that may be avoided through + use of a generic CSV-to-RDF transformation mechanism.

+ +

+ Requires: + CsvToRdfTransformation. +

+
+ +
+

2.5 Use Case #5 - Publication of property transaction data

+

+ (Contributed by Andy Seaborne) +

+

The Land Registry is the + government department with responsibility to register the ownership of land and property + within England and Wales. Once land or property is entered to the Land Register + any ownership changes, mortgages or leases affecting that land or property are recorded.

+ +

Their + Price paid data, dating from 1995 and consisting of more than 18.5 million records, + tracks the residential property sales in England and Wales that are lodged for registration. + This dataset is one of the most reliable sources of house price information in England and Wales.

+ +

Residential property transaction details are extracted from a data warehouse system + and collated into a tabular dataset for each month. + The current monthly dataset is available online in both .txt and + .csv formats. Snippets of data for January 2014 are provided below. White space + has been added for clarity.

+ +

+ pp-monthly-update.txt (local copy)

+
Example 9
{C6428808-DC2A-4CE7-8576-0000303EF81B},137000,2013-12-13 00:00, "B67 5HE","T","N","F","130","",       "WIGORN ROAD",       "",   "SMETHWICK",            "SANDWELL",       "WEST MIDLANDS","A"
+{16748E59-A596-48A0-B034-00007533B0C1}, 99950,2014-01-03 00:00, "PE3 8QR","T","N","F", "11","",             "RISBY","BRETTON","PETERBOROUGH","CITY OF PETERBOROUGH","CITY OF PETERBOROUGH","A"
+{F10C5B50-92DD-4A69-B7F1-0000C3899733},355000,2013-12-19 00:00,"BH24 1SW","D","N","F", "55","","NORTH POULNER ROAD",       "",    "RINGWOOD",          "NEW FOREST",           "HAMPSHIRE","A"
+{snip}
+ +

+ pp-monthly-update-new-version.csv (local copy)

+
Example 10
"{C6428808-DC2A-4CE7-8576-0000303EF81B}","137000","2013-12-13 00:00", "B67 5HE","T","N","F","130","",       "WIGORN ROAD",       "",   "SMETHWICK",            "SANDWELL",       "WEST MIDLANDS","A"
+"{16748E59-A596-48A0-B034-00007533B0C1}", "99950","2014-01-03 00:00", "PE3 8QR","T","N","F", "11","",             "RISBY","BRETTON","PETERBOROUGH","CITY OF PETERBOROUGH","CITY OF PETERBOROUGH","A"
+"{F10C5B50-92DD-4A69-B7F1-0000C3899733}","355000","2013-12-19 00:00","BH24 1SW","D","N","F", "55","","NORTH POULNER ROAD",       "",    "RINGWOOD",          "NEW FOREST",           "HAMPSHIRE","A"
+{snip}
+ +

There seems to be little difference between the two formats with the exception that all + cells within the .csv file are escaped with a pair of double quotes ("").

+ +

The header row is absent. Information regarding the meaning of each column and the + abbreviations used within the dataset are provided in a complementary + FAQ document. + The column headings are provided below along with some supplemental detail:

+ +
    +
  1. Transaction unique identifier
  2. +
  3. Price - sale price stated on the Transfer deed
  4. +
  5. Date of Transfer - date when the sale was completed, as stated on the Transfer deed
  6. +
  7. Postcode
  8. +
  9. Property Type - D (detatched), S (semi-detatched), + T (terraced), F (flats/maisonettes)
  10. +
  11. Old/New - Y (newly built property) and + N (established residential building)
  12. +
  13. Duration - relates to tenure; F (freehold) and L (leasehold)
  14. +
  15. PAON - Primary Addressable Object Name
  16. +
  17. SAON - Secondary Addressable Object Name
  18. +
  19. Street
  20. +
  21. Locality
  22. +
  23. Town/City
  24. +
  25. Local Authority
  26. +
  27. County
  28. +
  29. Record status - indicates status of the transaction; A + (addition of a new transaction), C (correction of an existing transaction) + and D (deleted transaction)
  30. +
+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+ +

Each row, or record, within the tabular dataset describes a property transaction. The + Transaction unique identifier column provides a unique identifier for that + property transaction. Given that transactions may be amended, this identifier cannot + be treated as a primary key for rows within the dataset as the identifier may occur + more than once. the primary key for each record. In order for the + property transaction to be referenced from outside this dataset, the local identifier + must be mapped to a globally unique identifier such as a URI.

+ +

+ Requires: + URIMapping. +

+ +

Each transaction record makes use of predefined category codes as outlined above; e.g. + Duration may be F (freehold) or L (leasehold). Furthermore, + geographic descriptors are commonly used. Whilst there is no attempt to + link these descriptors to specific geographic identifiers, such a linkage is likely + to provide additional utility when aggregating transaction data by location or region for further + analysis. At present there is no standardised mechanism to associate the catagory codes, + provided as plain text, or geographic identifiers with their authoritative definitions.

+ +

+ Requires: + AssociationOfCodeValuesWithExternalDefinitions. +

+ +

The collated monthly transaction dataset is used as the basis for updating the Land Registry's + information systems; in this case the data is persisted as RDF triples within a triple store. + A SPARQL end-point + and accompanying data definitions are provided + by the Land Registry allowing users to query the content of the triple store.

+ +

In order to update the triple store, the monthly transaction dataset is converted into RDF. The + value of the Record status cell for a given row informs the update process: add, update or + delete. Bespoke software has been created by the Land Registry to transformation from CSV to RDF. + The transformation requires supplementary information not present in the CSV, such as the RDF + properties for each column specified in the + data definitions. The need to create and maintain bespoke software incurs costs that may + be avoided through use of a generic CSV-to-RDF transformation mechanism.

+ +

+ Requires: + CsvToRdfTransformation. +

+ +
Note

The monthly transaction dataset contains in the order of 100,000 records; + any transformation will need to scale accordingly.

+ +

In parallel to providing access via the + SPARQL end-point, the Land Registry also provides aggregated sets of transaction data. Data is + available as a single file containing all transactions since 1995, or partitioned by year. + Given that the complete dataset is approaching 3GB in size, the annual partitions provide a + far more manageable method to download the property transaction data. However, each annual + partition is only a subset of the complete dataset. It is important to be able to both make + assertions about the complete dataset (e.g. publication date, license etc.) and to be + able to understand how an annual partition relates to the complete dataset and other partitions.

+ +

+ Requires: + CsvAsSubsetOfLargerDataset. +

+ +
+
+

2.6 Use Case #6 - Journal Article Solr Search Results

+

+ (Contributed by Alf Eaton) +

+

When performing literature searches researchers need to retain a persisted collection of + journal articles of interest in a local database compiled from on-line publication websites. + In this use case a researcher wants to retain a local personal journal article publication + database based on the search results from Public Library + of Science. PLOS One is a nonprofit open access scientific publishing project aimed at creating + a library of open access journals and other scientific literature under an open content license. +

+

+ In general this use case also illustrates the utility of CSV as a convenient exchange format for pushing + tabular data between software components: +

+
    +
  • making it easier to interpret the data on subsequent ingest +
  • +
  • being able to work with manageable chunks of a tabular data set (e.g. only subsets of the tabular dataset + are ever materialised in a single CSV file, and we often want to know how that subset fits within the larger + whole). +
  • +
+ +

The PLOS website features a Solr index search + engine (Live Search) which can return query results in + XML, + JSON + or in a more concise CSV format. + The output from the CSV Live Search is illustrated below: +

+
Example 11
id,doi,publication_date,title_display,author
+10.1371/journal.pone.0095131,10.1371/journal.pone.0095131,2014-06-05T00:00:00Z,"Genotyping of French <i>Bacillus anthracis</i> Strains Based on 31-Loci Multi Locus VNTR Analysis: Epidemiology, Marker Evaluation, and Update of the Internet Genotype Database","Simon Thierry,Christophe Tourterel,Philippe Le Flèche,Sylviane Derzelle,Neira Dekhil,Christiane Mendy,Cécile Colaneri,Gilles Vergnaud,Nora Madani"
+10.1371/journal.pone.0095156,10.1371/journal.pone.0095156,2014-06-05T00:00:00Z,Pathways Mediating the Interaction between Endothelial Progenitor Cells (EPCs) and Platelets,"Oshrat Raz,Dorit L Lev,Alexander Battler,Eli I Lev"
+10.1371/journal.pone.0095275,10.1371/journal.pone.0095275,2014-06-05T00:00:00Z,Identification of Divergent Protein Domains by Combining HMM-HMM Comparisons and Co-Occurrence Detection,"Amel Ghouila,Isabelle Florent,Fatma Zahra Guerfali,Nicolas Terrapon,Dhafer Laouini,Sadok Ben Yahia,Olivier Gascuel,Laurent Bréhélin"
+10.1371/journal.pone.0096098,10.1371/journal.pone.0096098,2014-06-05T00:00:00Z,Baseline CD4 Cell Counts of Newly Diagnosed HIV Cases in China: 2006–2012,"Houlin Tang,Yurong Mao,Cynthia X Shi,Jing Han,Liyan Wang,Juan Xu,Qianqian Qin,Roger Detels,Zunyou Wu"
+10.1371/journal.pone.0097475,10.1371/journal.pone.0097475,2014-06-05T00:00:00Z,Crystal Structure of the Open State of the <i>Neisseria gonorrhoeae</i> MtrE Outer Membrane Channel,"Hsiang-Ting Lei,Tsung-Han Chou,Chih-Chia Su,Jani Reddy Bolla,Nitin Kumar,Abhijith Radhakrishnan,Feng Long,Jared A Delmar,Sylvia V Do,Kanagalaghatta R Rajashankar,William M Shafer,Edward W Yu"
+

Versions of the search results provided at time of writing are available locally in + XML, + JSON and + CSV formats for reference.

+ +

A significant difference between the CSV formatted results and those of JSON + and XML is the absence of information about how the set of results provided in the HTTP response fit within + the complete set of results that match the Live Search request. The information provided + in the JSON and XML search results states both the total number of "hits" for the Live + Search request and the start index within the complete set (zero for the example provided + here as the ?start={offset} query parameter is absent from the request).

+ +
Note
+

Other common methods of splitting up large datasets into manageable chunks include + partitioning by time (e.g. all the records added to a dataset in a given day may be + exported in a CSV file). Such partitioning allows regular updates to be shared. However, + in order to recombine those time-based partitions into the complete set, one needs to know + the datetime range for which that dataset partition is valid. Such information should be + available within a CSV metadata description.

+
+ +

+ Requires: + CsvAsSubsetOfLargerDataset. +

+ +

+ To be useful to a user maintaining a PLOS One search results need to be returned in an organized and + consistent tabular format. This includes:

+
    +
  • mapping search critiera cells to columns returned in the search results +
  • +
  • ordering the columns to match the order of the search criteria cells. +
  • +
+

Lastly because the researcher may use different search criteria the header row plays an important role + later for the researcher wanting to combine multiple literature searches into their database. + The researcher will use the header column names returned in the first row as a way to identify + each column type. +

+

+ Requires: + WellFormedCsvCheck and + CsvValidation. +

+

Search results returned in a tabular format can contain cell values that organized in data structures + also known as micro formats. In example above the publication_date and authors list represent two + micro formats that are represented in a recognizable pattern that can be parsed by software or + by the human reader. In the case of the author column, microformats provide the advantage of being + able to store a single author's name or multiple authors names separated by a comma delimiter. + Because each author cell value is surrounded by quotes a parser can choose to ignore the + data structure or address it.

+

Furthermore, note that the values of the title_display column contain markup. Whilst + these values may be treated as pure text, it provides an example of how structure or + syntax may be embedded within a cell.

+

+ Requires: + CellMicrosyntax and + RepeatedProperties. +

+ +
+ +
+

2.7 Use Case #7 - Reliability Analyzes of Police Open Data

+

+ (Contributed by Davide Ceolin) +

+

Several Web sources expose datasets about UK crime statistics. + These datasets vary in format (e.g. maps vs. CSV files), timeliness, aggregation level, etc. + Before being published on the Web, these data are processed to preserve the privacy of the people + involved, but again the processing policy varies from source to source.

+

Every month, the UK Police Home Office publishes (via data.police.uk) CSV files that report crime + counts, aggregated on geographical basis (per address or police neighbourhood) and on type basis. + Before publishing, data are smoothed, that is, grouped in predefined areas and assigned to the + mid point of each area. Each area has to contain a minimum number of physical addresses. The goal + of this procedure is to prevent the reconstruction of the identity of the people involved in the + crimes.

+

Over time, the policies adopted for preprocessing these data have changed, but data previously + published have not been recomputed. Therefore, datasets about different months present relevant + differences in terms of crime types reported and geographical aggregation (e.g. initially, each + geographical area for aggregation had to include at least 12 physical addresses. Later, this + limit was lowered to 8).

+

These policies introduce a controlled error in the data for privacy reasons, but these changes + in the policies imply the fact that different datasets adhere differently to the real data, i.e. + they present different reliability levels. Previous work provided two procedures for measuring + and comparing the reliability of the datasets, but in order to automate and improve these procedures, + it is crucial to understand the meaning of the columns, the relationships between columns, and how the + data rows have been computed.

+

For instance, here is a snippet from a dataset about crime happened in Hampshire in April 2012:

+
Example 12
Month,	Force,			Neighbourhood,	Burglary,	Robbery,	Vehicle crime,	Violent crime,	Anti-social behaviour,	Other crime
+{snip}
+2011-04	Hampshire Constabulary,	2LE11,		2,		0,		1,		6,		14,			6
+2011-04	Hampshire Constabulary,	2LE10,		1,		0,		2,		4,		15,			6
+2011-04	Hampshire Constabulary,	2LE12,		3,		0,		0,		4,		25,			21
+{snip}
+

and that dataset reports 248 entries, while in October 2012, the crime types we can see are increased to 11:

+
Example 13
Month,	Force,			Neighbourhood,	Burglary,	Robbery,	Vehicle crime,	Violent crime,	Anti-social behaviour,	Criminal damage and arson,	Shoplifting,	Other theft,	Drugs,	Public disorder and weapons,	Other crime
+{snip}
+2012-10,Hampshire Constabulary,	2LE11,		1,		0,		1,		2,		8,			0,				0,		1,		1,	0,				1
+2012-10,Hampshire Constabulary,	1SY01,		9,		1,		12,		8,		87,			17,				12,		14,		13,	7,				4
+2012-10,Hampshire Constabulary,	1SY02,		11,		0,		11,		20,		144,			39,				2,		12,		9,	8,				5
+  {snip}
+

This dataset reports 232 entries.

+ +

In order to properly handle the columns, + it is crucial to understand the type of the data contained therein. Given the context, knowing + this information would reveal an important part of the column meaning (e.g. to identify dates). +

+

+ Requires: + SyntacticTypeDefinition. +

+

+ Also, it is important to understand the precise semantics of each column. + This is relevant for two reasons. First, to identify relations between columns (e.g. some crime types + are siblings, while other are less semantically related). Second, to identify semantic relations between + columns in heterogeneous datasets (e.g. a column in one dataset may correspond to the sum of two or more + columns in others). +

+

+ Requires: + SemanticTypeDefinition. +

+

Lastly, datasets with different row numbers are the result of different smoothing procedures. Therefore, it would + be important to trace and access their provenance, in order to facilitate their comparison.

+

+ Requires: + AnnotationAndSupplementaryInfo. +

+
+
+

2.8 Use Case #8 - Analyzing Scientific Spreadsheets

+

+ (Contributed by Alf Eaton, Davide Ceolin, Martine de Vos) +

+

A paper published in Nature Immunology in December 2012 compared changes in expression of a range of genes in response to treatment with two different cytokines. The results were published in the paper as graphic figures, and the raw data was presented in the form of supplementary spreadsheets, as Excel files (local copy).

+ +

Having at disposal both the paper and the results, a scientist may wish to reproduce the experiment, check if the results he obtains coincide with those published, and compare those results with others, provided by different studies about the same issues.

+ +

Because of the size of the datasets and of the complexity of the computations, it could be necessary to perform such analyses and comparisons by means of properly defined software, typically by means of an R, Python or Matlab script. Such software would require as input the data contained in the Excel file. However, it would be difficult to write a parser to extract the information, for the reasons described below.

+ +

To clarify the issues related to the spreadsheet parsing and analysis, we first present an example extrapolated from it. The example below shows a CSV encoding of the original Excel speadsheet converted using Mircosoft Excel 2007. White space has been added to aid clarity. (file = ni.2449-S3.csv)

+ +
Example 14
Supplementary Table 2. Genes more potently regulated by IL-15,,,,,,,,,,,,,,,,,,
+            ,         ,     ,       ,         ,        ,          ,       ,         ,        ,          ,           ,         ,        ,          ,       ,         ,        ,
+   gene_name,   symbol, RPKM,       ,         ,        ,          ,       ,         ,        ,          ,Fold Change,         ,        ,          ,       ,         ,        ,
+            ,         ,     , 4 hour,         ,        ,          ,24 hour,         ,        ,          ,     4 hour,         ,        ,          ,24 hour,         ,        ,
+            ,         , Cont,IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM,IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM,    IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM,IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM
+NM_001033122,     Cd69,15.67,  46.63,   216.01,   30.71,    445.58,   9.21,    77.32,    4.56,     77.21,       2.98,    13.78,    1.96,     28.44,   0.59,     4.93,    0.29,      4.93
+   NM_026618,   Ccdc56, 9.07,  12.55,     9.25,    5.88,     14.33,  20.08,    20.91,   11.97,     22.69,       1.38,     1.02,    0.65,      1.58,   2.21,     2.31,    1.32,      2.50
+   NM_008637,    Nudt1, 9.31,   7.51,     8.60,   11.21,      6.84,  15.85,    25.14,    7.56,     22.77,       0.81,     0.92,    1.20,      0.73,   1.70,     2.70,    0.81,      2.45
+   NM_008638,   Mthfd2,58.67,  33.99,   245.87,   44.66,    167.87,  55.62,   204.50,   24.52,    176.51,       0.58,     4.19,    0.76,      2.86,   0.95,     3.49,    0.42,      3.01
+   NM_178185,Hist1h2ao, 7.13,  16.52,     7.82,    7.79,     16.99,  75.04,   290.72,   21.99,    164.93,       2.32,     1.10,    1.09,      2.38,  10.52,    40.78,    3.08,     23.13
+{snip}
+ +

As we can see from the example, the table contains several columns of data that are measurements of gene expression in cells after treatment with two concentrations of two cytokines, measured after two periods of time, presented as both actual values and fold change. This can be represented in a table, but needs 3 levels of headings and several merged cells. In fact, the first row is the title of the table, the second to fourth rows are the table headers.

+ +

We also see that the first column gene_name provides a unique identifier for the gene described in each row, with the second column symbol providing a + human readable notation for each gene - albeit a scientific human! It is necessary to determine which column, if any, provides the unique identifier for the entity which + each row describes. In order for the gene to be referenced from outside the datafile, e.g. to reconcile the information in this table with other information about the gene, the local identifier must be mapped to a globally unique identifier such as a URI.

+ +

+ Requires: + MultipleHeadingRows and + URIMapping. +

+ +

The first column contains a GenBank identifier for each gene, with the column name "gene_name". The GenBank identifier provides a local identifier for each gene. This local identifier, e.g. “NM_008638”, can be converted to a fully qualified URI by adding a URI prefix, e.g. “http://www.ncbi.nlm.nih.gov/nuccore/NM_008638” allowing the gene to be uniquely and unambiguously identified.

+ +

The second column contains the standard symbol for each gene, labelled as "symbol". These appear to be HUGO gene nomenclature symbols, but as there's no mapping it's hard to be sure which namespace these symbols are from.

+ +

+ Requires: + URIMapping. +

+ +

As this spreadsheet was published as supplemental data for a journal article, there is little description of what the columns represent, even as text. There is a column labelled as "Cont", which has no description anywhere, but is presumably the background level of expression for each gene.

+ +

+ Requires: + SyntacticTypeDefinition and + SemanticTypeDefinition. +

+ +

Half of the cells represent measurements, but the details of what those measurements are can only be found in the article text. The other half of the cells represent the change in expression over the background level. It is difficult to tell the difference without annotation that describes the relationship between the cells (or understanding of the nested headings). In this particular spreadsheet, only the values are published, and not the formulae that were used to calculate the derived values. The units of each cell are "expression levels relative to the expression level of a constant gene, Rpl7", described in the text of the methods section of the full article.

+ +

+ Requires: + UnitMeasureDefinition. +

+ +

The heading rows contain details of the treatment that each cell received, e.g. "4 hour, IL2_1nM". It would be useful to be able to make this machine readable (i.e. to represent treatment with 1nM IL-2 for 4 hours).

+ +

All the details of the experiment (which cells were used, how they were treated, when they were measured) are described in the methods section of the article. To be able to compare data between multiple experiments, a parser would also need to be able to understand all these parameters that may have affected the outcome of the experiment.

+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+
+
+

2.9 Use Case #9 - Chemical Imaging

+

+ (Contributed by Mathew Thomas) +

+

Chemical imaging experimental work makes use of CSV formats to record its measurements. In this use case two examples are shown to depict scans from a mass spectrometer and corresponding FTIR corrected files that are saved into a CSV format automatically.

+

Mass Spectrometric Imaging (MSI) allows the generation of 2D ion density maps that help visualize molecules present in sections of tissues and cells. The combination of spatial resolution and mass resolution results in very large and complex data sets. The following is generated using the software Decon Tools, a tool to de-isotope MS spectra and to detect features from MS data using isotopic signatures of expected compounds, available freely at omins.pnnl.gov. The raw files generated by the mass spec instrument are read in and the processed output files are saved as CSV files for each line.

+

Fourier transform (FTIR) spectroscopy is a measurement technique whereby spectra are collected based on measurements of the coherence of a radiative source, using time-domain or space-domain measurements of the electromagnetic radiation or other type of radiation.

+

In general this use case also illustrates the utility of CSV as a means for scientists to collect and process their experimental results:

+
    +
  • making it easier for data to be loaded into a spreadsheet to examine results
  • +
  • being able to edit or select a portion of results to be plotted
  • +
  • making it possible to combine all scans to examine full 2D composite image.
  • +
+

The key characteristics are:

+
    +
  • CSV uses fixed number of cells
  • +
  • First row provides header cell tags, although the FTIR header begins with a comma
  • +
  • All values are comma separated, but they can be delimited by tabs as well.
  • +
  • Because the data is being collected from an instrument some of the columns represent measurement values taken during the experiment.
  • +
  • Left column is typically regarded as the row primary key.
  • +
+

+ Requires: + WellFormedCsvCheck, + CsvValidation , + PrimaryKey and + UnitMeasureDefinition. +

+

Lastly, for Mass Spectrometry multiple CSV files need to be examined to view the sample image in its entirety.

+

+ Requires: + CsvAsSubsetOfLargerDataset . +

+

Below are Mass Spectrometry instrument measurements (3 of 316 CSV rows) for a single line on a sample. It gives the mass-to-charge ranges, peak values, acquisition times and total ion current.

+
Example 15
scan_num,scan_time,type,bpi,bpi_mz,tic,num_peaks,num_deisotoped,info
+1,0,1,4.45E+07,576.27308,1.06E+09,132,0,FTMS + p NSI Full ms [100.00-2000.00]
+2,0.075,1,1.26E+08,576.27306,2.32E+09,86,0,FTMS + p NSI Full ms [100.00-2000.00]
+3,0.1475,1,9.53E+07,576.27328,1.66E+09,102,0,FTMS + p NSI Full ms [100.00-2000.00]
+

Below is a example FTIR data. The files from the instrument are baseline corrected, normalized and saved as CSV files automatically. Column 1 represents the wavelength # or range and the represent different formations like bound eps (extracellular polymeric substance), lose eps, shewanella etc. Below are (5 of 3161 rows) is a example:

+
Example 16
,wt beps,wt laeps,so16533 beps,so167333 laeps,so31 beps,so313375 lAPS,so3176345 bEPS,so313376 laEPS,so3193331 bEPS,so3191444 laeps,so3195553beps,so31933333 laeps
+ 1999.82,-0.0681585,-0.04114415,-0.001671781,0.000589855,0.027188073,0.018877371,-0.066532177,-0.016899697,-0.077690018,0.001594551,-0.086573831,-0.08155035
+ 1998.855,-0.0678255,-0.0409804,-0.001622611,0.000552989,0.027188073,0.01890847,-0.066132737,-0.016857071,-0.077346835,0.001733207,-0.086115107,-0.081042424
+ 1997.89,-0.067603,-0.0410459,-0.001647196,0.000423958,0.027238845,0.018955119,-0.065904461,-0.016750515,-0.077101756,0.001733207,-0.085656382,-0.080590934
+ 1996.925,-0.0673255,-0.04114415,-0.001647196,0.000258061,0.027289616,0.018970669,-0.065790412,-0.01664396,-0.076856677,0.001629215,-0.085281062,-0.080365189
+
+ +
+

2.10 Use Case #10 - OpenSpending Data

+

+ (Contributed by Stasinos Konstantopoulos) +

+

The OpenSpending and the Budgit platforms + provide plenty of useful datasets providing figures of national budget and spending of several countries. A journalist willing to investigate + about public spending fallacies can use these data as a basis for his research, and possibly compare them against different sources. + Similarly, a politician that is interested in developing new policies for development can, for instance, combine these data with those from the + World Bank to identify correlations and, possibly, dependencies to leverage. +

+

+ Nevertheless, these uses of these datasets are possibly undermined by the following obstacles. +

+
    +
  • There are whole collections of datasets where a single currency is implied for all amounts given. See, for example, how all + Slovenian Budget Datasets are implicitly give amounts in Euros. Given that Slovenia joined the Eurozone in 2007, + the currency in has changed relatively recently. How do we know if a given table expresses currency amounts in “tolar” or “Euro”?

    +

    In order to be able to compare and combine these data with those provided by other sources like the + World Bank, + in an automatic manner, it would be necessary to explicitly define the currency of each column. Given that + the currency will be uniform for a specific table, the currency metadata may be indicated once for the entire table.

    +

    + Requires: + UnitMeasureDefinition. +

  • +
  • Similar issues are also in the Uganda Budget and Aid to Uganda, 2003-2007 file, + where there are four columns related to the amount. Of these, "amount" (Ugandan Shillings implied) + and "amount_dollars" (USD implied) are mandatory. The value of these columns is implicit, and moreover, as explained in the + complementary information, the Ugandan Shillings amount is computed by converting + the Dollars amount using a ratio determined on year basis (e.g. 2003/4: 1 USD = 1.847 UGX). Since this ratio varies on year basis, + and still corresponds to an approximation of the yearly value of the exchange rate, in order to properly use these data, it would + be preferable to know how these were obtained, or where to find such information. + +

    + Requires: + AssociationOfCodeValuesWithExternalDefinitions and + AnnotationAndSupplementaryInfo. +

    +
  • +
  • + Again in the Uganda Budget and Aid to Uganda, 2003-2007 file, if a row represents a + donation, then the values for the "amount_donor" (the amount in the donor's original currency) and "donorcurrency" (the donor's currency name) columns of + that row are reported. + Otherwise, the corresponding values are set to "0", to indicate that the row does not represent a donation and that the only relevant amounts for that row + are reported in the "amount" and "amount_dollars" column. To make these files machine-understandable, it is necessary to make this coding explicit. +

    + Requires: + MissingValueDefinition. +

    +
  • +
+
Note

The datahub.io platform that collects both OpenSpending and Budgit data allows publishing data in Simple Data Format (SDF), RDF + and other formats providing explicit semantics. Nevertheless, the datasets mentioned above present either implicit semantics and/or additional metadata files provided only + as attachment.

+
+ + + +
+

2.11 Use Case #11 - City of Palo Alto Tree Data

+

+ (Contributed by Eric Stephan) +

+

The City of Palo Alto, + California Urban Forest Section is responsible for maintaining and tracking + the cities public trees and urban forest. In a W3C Data on the Web Best Practices (DWBP) use case + discussion with Jonathan Reichental City of Palo Alto CIO, he brought to the working groups attention + a Tree Inventory maintained by the city in a + spreadsheet + form using Google Fusion. This use case represents use of tabular data to be representative of + geophysical tree locations + also provided in Google Map form where the user can point and click on trees to look up row + information about the tree. +

+

The example below illustrates the first few rows of data:

+
Example 17
GID,Private,Tree ID,Admin Area,Side of Street,On Street,From Street,To Street,Street_Name,Situs Number,Address Estimated,Lot Side,Serial Number,Tree Site,Species,Trim Cycle,Diameter at Breast Ht,Trunk Count,Height Code,Canopy Width,Trunk Condition,Structure Condition,Crown Condition,Pest Condition,Condition Calced,Condition Rating,Vigor,Cable Presence,Stake Presence,Grow Space,Utility Presence,Distance from Property,Inventory Date,Staff Name,Comments,Zip,City Name,Longitude,Latitude,Protected,Designated,Heritage,Appraised Value,Hardscape,Identifier,Location Feature ID,Install Date,Feature Name,KML,FusionMarkerIcon
+1,True,29,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,203,,Front,,2,Celtis australis,Large Tree Routine Prune,11,1,25-30,15-30,,Good,5,,,Good,2,False,False,Planting Strip,,44,10/18/2010,BK,,,Palo Alto,-122.1565172,37.4409561,False,False,False,,None,40,13872,,"Tree: 29 site 2 at 203 ADDISON AV, on ADDISON AV 44 from pl","<Point><coordinates>-122.156485,37.440963</coordinates></Point>",small_green
+2,True,30,,,EMERSON ST,CHANNING AV,ADDISON AV,ADDISON AV,203,,Left,,1,Liquidambar styraciflua,Large Tree Routine Prune,11,1,50-55,15-30,Good,Good,5,,,Good,2,False,False,Planting Strip,,21,6/2/2010,BK,,,Palo Alto,-122.1567812,37.440951,False,False,False,,None,41,13872,,"Tree: 30 site 1 at 203 ADDISON AV, on EMERSON ST 21 from pl","<Point><coordinates>-122.156749,37.440958</coordinates></Point>",small_green
+3,True,31,,,EMERSON ST,CHANNING AV,ADDISON AV,ADDISON AV,203,,Left,,2,Liquidambar styraciflua,Large Tree Routine Prune,11,1,40-45,15-30,Good,Good,5,,,Good,2,False,False,Planting Strip,,54,6/2/2010,BK,,,Palo Alto,-122.1566921,37.4408948,False,False,False,,Low,42,13872,,"Tree: 31 site 2 at 203 ADDISON AV, on EMERSON ST 54 from pl","<Point><coordinates>-122.156659,37.440902</coordinates></Point>",small_green
+4,True,32,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,209,,Front,,1,Ulmus parvifolia,Large Tree Routine Prune,18,1,35-40,30-45,Good,Good,5,,,Good,2,False,False,Planting Strip,,21,6/2/2010,BK,,,Palo Alto,-122.1564595,37.4410143,False,False,False,,Medium,43,13873,,"Tree: 32 site 1 at 209 ADDISON AV, on ADDISON AV 21 from pl","<Point><coordinates>-122.156427,37.441022</coordinates></Point>",small_green
+5,True,33,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,219,,Front,,1,Eriobotrya japonica,Large Tree Routine Prune,7,1,15-20,0-15,Good,Good,3,,,Good,1,False,False,Planting Strip,,16,6/1/2010,BK,,,Palo Alto,-122.1563676,37.441107,False,False,False,,None,44,13874,,"Tree: 33 site 1 at 219 ADDISON AV, on ADDISON AV 16 from pl","<Point><coordinates>-122.156335,37.441114</coordinates></Point>",small_green
+6,True,34,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,219,,Front,,2,Robinia pseudoacacia,Large Tree Routine Prune,29,1,50-55,30-45,Poor,Poor,5,,,Good,2,False,False,Planting Strip,,33,6/1/2010,BK,cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay;  beware of BEES.,,Palo Alto,-122.1563313,37.4411436,False,False,False,,None,45,13874,,"Tree: 34 site 2 at 219 ADDISON AV, on ADDISON AV 33 from pl","<Point><coordinates>-122.156299,37.441151</coordinates></Point>",small_green
+{snip}
+

The complete CSV file of Palo Alto tree data is available locally + - but please note that it is approximately 18MB in size.

+

+ Google Fusion allows a user to download the tree data either from a filtered view or the entire spreadsheet. + The exported spreadsheet is organized and consistent tabular format. This includes:

+
    +
  • mapping spreadsheet cells to columns in the CSV file. +
  • +
  • ordering the CSV columns to match the order of the spreadsheet columns. +
  • +
  • The CSV file provides a primary key for each row (column GID), a unique identifier + for each tree (column Tree ID), accounts for missing data, and lists characteristics + describing the condition of the tree in the comments cell using a micro syntax to delimit the + characteristics list. The spreadsheet also provides geo coordinate information pinpointing each + inventoried tree. +
  • +
+

In order for information about a given tree to be reconciled with information about the same + tree originating from other sources, the local identifier for that tree must be mapped to a + globally unique identifier such as a URI.

+

Also note that in row 6, a series of statements describing the condition of the tree and other + important information are provided in the comments cell. These statements are + delimited using the semi-colon ";" character.

+

+ Requires: + WellFormedCsvCheck, + CsvValidation, + PrimaryKey, + URIMapping, + MissingValueDefinition, + UnitMeasureDefinition, + CellMicrosyntax and + RepeatedProperties. +

+
+ +
+

2.12 Use Case #12 - Chemical Structures

+

+ (Contributed by Eric Stephan) +

+

The purpose of this use case is to illustrate how 3-D molecular structures such as the Protein Data Bank and XYZ formats are conveyed in tabular formats. These files be archived to be used informatics analysis or as part of an input deck to be used in experimental simulation. Scientific communities rely heavily on tabular formats such as these to conduct their research and share each others results in platform independent formats.

+ +

The Protein Data Bank (pdb) file format is a tabular file describing the three dimensional structures of molecules held in the Protein Data Bank. The pdb format accordingly provides for description and annotation of protein and nucleic acid structures including atomic coordinates, observed sidechain rotamers, secondary structure assignments, as well as atomic connectivity.

+ +

The XYZ file format is a chemical file format. There is no formal standard and several variations exist, but a typical XYZ format specifies the molecule geometry by giving the number of atoms with Cartesian coordinates that will be read on the first line, a comment on the second, and the lines of atomic coordinates in the following lines.

+ +

In general this use case also illustrates the utility of CSV as a means for scientists to collect and process their experimental results:

+
    +
  • making it easier for data to be loaded into a spreadsheet to examine results
  • +
  • being able to edit or select a portion of results to be plotted
  • +
  • making it possible to combine all scans to examine full 2D composite image.
  • +
+ +

The key characteristics of the XYZ format are:

+
    +
  • CSV contains two header rows, the first row containing the number of atoms in molecule (number of rows in data block). The second is a comment line.
  • +
  • Each row in the data block used a fix number of cells (atom name followed by x, y, z coordinates).
  • +
  • All values are delimited by spaces.
  • +
+ +

+ Requires: + WellFormedCsvCheck, + CsvValidation, + MultipleHeadingRows and + UnitMeasureDefinition. +

+ +

Below is a Methane molecular structure organized in an XYZ format.

+ +
Example 18
5
+methane molecule (in angstroms)
+C        0.000000        0.000000        0.000000
+H        0.000000        0.000000        1.089000
+H        1.026719        0.000000       -0.363000
+H       -0.513360       -0.889165       -0.363000
+H       -0.513360        0.889165       -0.363000
+ +

The key characteristics of the PDB format are:

+
    +
  • Each PDB record is self describing and contains different ways to document each protein.
  • +
  • Each row of the file uses a token to depict the purpose of that row.
  • +
  • Tabular data rows varies fixed number of columns (e.g. ATOM) to non-fixed number of columns (SEQRES) that specify the number of columns in the row.
  • +
  • Because the PDB is a fully contained self describing record it also provides multiple tables to annotate the record. Each table appears to be delimited by a line in the file "...".
  • +
+ +

+ Requires: + GroupingOfMultipleTables. +

+ +

Below is a example PDB file:

+ +
Example 19
HEADER    EXTRACELLULAR MATRIX                    22-JAN-98   1A3I
+TITLE     X-RAY CRYSTALLOGRAPHIC DETERMINATION OF A COLLAGEN-LIKE
+TITLE    2 PEPTIDE WITH THE REPEATING SEQUENCE (PRO-PRO-GLY)
+...
+EXPDTA    X-RAY DIFFRACTION
+AUTHOR    R.Z.KRAMER,L.VITAGLIANO,J.BELLA,R.BERISIO,L.MAZZARELLA,
+AUTHOR   2 B.BRODSKY,A.ZAGARI,H.M.BERMAN
+...
+REMARK 350 BIOMOLECULE: 1
+REMARK 350 APPLY THE FOLLOWING TO CHAINS: A, B, C
+REMARK 350   BIOMT1   1  1.000000  0.000000  0.000000        0.00000
+REMARK 350   BIOMT2   1  0.000000  1.000000  0.000000        0.00000
+...
+SEQRES   1 A    9  PRO PRO GLY PRO PRO GLY PRO PRO GLY
+SEQRES   1 B    6  PRO PRO GLY PRO PRO GLY
+SEQRES   1 C    6  PRO PRO GLY PRO PRO GLY
+...
+ATOM      1  N   PRO A   1       8.316  21.206  21.530  1.00 17.44           N
+ATOM      2  CA  PRO A   1       7.608  20.729  20.336  1.00 17.44           C
+ATOM      3  C   PRO A   1       8.487  20.707  19.092  1.00 17.44           C
+ATOM      4  O   PRO A   1       9.466  21.457  19.005  1.00 17.44           O
+ATOM      5  CB  PRO A   1       6.460  21.723  20.211  1.00 22.26           C
+...
+HETATM  130  C   ACY   401       3.682  22.541  11.236  1.00 21.19           C
+HETATM  131  O   ACY   401       2.807  23.097  10.553  1.00 21.19           O
+HETATM  132  OXT ACY   401       4.306  23.101  12.291  1.00 21.19           O
+
+ +
+

2.13 Use Case #13 - Representing Entities and Facts Extracted From Text

+

+ (Contributed by Tim Finin) +

+ +

The US National Institute of Standards and Technology (NIST) has run various conferences on extracting information from text centered around challenge problems. Participants submit the output of their systems on an evaluation dataset to NIST for scoring, typically in the form of tab-separated format.

+ +

The 2013 NIST Cold Start Knowledge Base Population Task, for example, asks participants to extract facts from text and to represent these as triples along with associated metadata that include provenance and certainty values. A line in the submission format consists of a triple (subject-predicate-object) and, for some predicates, provenance information. Provenance includes a document ID and, depending on the predicate, one or three pairs of string offsets within the document. For predicates that are relations, an optional second set of provenance values can be provided. Each line can also have an optional float as a final column to represent a certainty measure.

+ +

The following lines show examples of possible triples of varying length. In the second line, D00124 is the ID of a document and the strings like 283-286 refer to strings in a document using the offsets of the first and last characters. The final floating point value on some lines is the optional certainty value.

+ +
Example 20
{snip}
+:e4 type         PER
+:e4 mention      "Bart"  D00124 283-286
+:e4 mention      "JoJo"  D00124 145-149 0.9
+:e4 per:siblings :e7     D00124 283-286 173-179 274-281
+:e4 per:age      "10"    D00124 180-181 173-179 182-191 0.9
+:e4 per:parent   :e9     D00124 180-181 381-380 399-406 D00101 220-225 230-233 201-210
+{snip}
+ +

The submission format does not require that each line have the same number of columns. The expected provenance information for a triple depends on the predicate. For example, “type” typically has no provenance, “mention” has a document ID and offset pair, and domain predicates like “per:age” have one or two provenance records each of which has a document ID and three offset pairs.

+ +

The file format exemplified above opens up for a number of issues described as follows. Each row is intended to describe an entity (e.g. the subject of the triple, “:e4”). The unique identifier for that entity is provided in the first column. In order for information about this entity to be reconcilled with information from other sources about the same entity, the local identifier needs to be mapped to a globally unique identifier such as a URI.

+ +

+ Requires: + URIMapping. +

+ +

After each triple, there is a variable number of annotations representing the provenance of the triple and, occasionally, its certainty. This information has to be properly identified and managed.

+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+ +

Entities “:e4”, “:e7” and “:e9” appear to be (foreign key) references to other entities described in this or in external tables. Likewise, also the identifiers “D00124” and “D00101” are ambiguous identifiers. It would be useful to identify the resources that these references represent.

+ +

Moreover, “per” appears to be a term from a controlled vocabulary. How do we know which controlled vocabulary it is a member of and what its authoritative definition is?

+ +

+ Requires: + ForeignKeyReferences, + AssociationOfCodeValuesWithExternalDefinitions and + SemanticTypeDefinition. +

+ +

The identifiers used for the entities (“:e4”, “:e7” and “:e9”), as well as those used for the predicates (e.g. “type”, “mention”, “per:siblings” etc.), are ambiguous local identifiers. How can one make the identifier an unambiguous URI? A similar requirement regards the provenance annotations. These are composed by document (e.g. “D00124”) and page number ranges. (e.g. “180-181”). Page number ranges are clearly valid only in the context of the preceding document identifier. The interesting assertion about provenance is the reference (document plus page range). Thus we might want to give the reference a unique identifier comprising from document ID and page range (e.g. D00124#180-181).

+ +

+ Requires: + URIMapping. +

+ +

Besides the entities, the table presents also some values. Some of these are strings (e.g. “10”, “Bart”), some of them are probably floating point values (e.g. “0.9”). It would be useful to have an explicit syntactic type definition for these values.

+ +

+ Requires: + SyntacticTypeDefinition. +

+ +

Entity “:e4” is the subject of many rows, meaning that many rows can be combined to make a composite set of statements about this entity.

+ +

Moreover, a single row in the table comprises a triple (subject-predicate-object), one or more provenance references and an optional certainty measure. The provenance references have been normalised for compactness (e.g. so they fit on a single row). However, each provenance statement has the same target triple so one could unbundle the composite row into multiple simple statements that have a regular number of columns (see the two equivalent examples below).

+ +
Example 21
{snip}
+:e4 per:age      "10"    D00124 180-181 173-179 182-191 0.9
+:e4 per:parent   :e9     D00124 180-181 381-380 399-406 D00101 220-225 230-233 201-210
+{snip}
+
Example 22
{snip}
+:e4 per:age      "10"    D00124 180-181 0.9
+:e4 per:age      "10"    D00124 173-179 0.9
+:e4 per:age      "10"    D00124 182-191 0.9
+:e4 per:parent   :e9     D00124 180-181
+:e4 per:parent   :e9     D00124 381-380
+:e4 per:parent   :e9     D00124 399-406
+:e4 per:parent   :e9     D00101 220-225
+:e4 per:parent   :e9     D00101 230-233
+:e4 per:parent   :e9     D00101 201-210
+{snip}
+ +

+ Requires: + TableNormalization. +

+ +

Lastly, since we already observed that rows comprise triples, that there is a frequent reference to externally defined vocabularies, that values are defined as text (literals), and that triples are also composed by entities, for which we aim to obtain a URI (as described above), it may be useful to be able to convert such a table in RDF.

+ +

+ Requires: + CsvToRdfTransformation. +

+
+ +
+

2.14 Use Case #14 - Displaying Locations of Care Homes on a Map

+

+ (Contributed by Jeni Tennison) +

+ +

NHS Choices makes available a number of (what it calls) CSV files for different aspects of NHS data on its website at http://www.nhs.uk/aboutnhschoices/contactus/pages/freedom-of-information.aspx

+ +

One of the files (file = SCL.csv) contains information about the locations of care homes, as illustrated in the example below:

+ +
Example 23
OrganisationID¬OrganisationCode¬OrganisationType¬SubType¬OrganisationStatus¬IsPimsManaged¬OrganisationName¬Address1¬Address2¬Address3¬City¬County¬Postcode¬Latitude¬Longitude¬ParentODSCode¬ParentName¬Phone¬Email¬Website¬Fax¬LocalAuthority
+220153¬1-303541019¬Care homes and care at home¬UNKNOWN¬Visible¬False¬Bournville House¬Furnace Lane¬Lightmoor Village¬¬Telford¬Shropshire¬TF4 3BY¬0¬0¬1-101653596¬Accord Housing Association Limited¬01952739284¬¬www.accordha.org.uk¬01952588949¬
+220154¬1-378873485¬Care homes and care at home¬UNKNOWN¬Visible¬True¬Ashcroft¬Milestone House¬Wicklewood¬¬Wymondham¬Norfolk¬NR18 9QL¬52.577003479003906¬1.0523598194122314¬1-377665735¬Julian Support Limited¬01953 607340¬ashcroftresidential@juliansupport.org¬http://www.juliansupport.org¬01953 607365¬
+220155¬1-409848410¬Care homes and care at home¬UNKNOWN¬Visible¬False¬Quorndon Care Limited¬34 Bakewell Road¬¬¬Loughborough¬Leicestershire¬LE11 5QY¬52.785675048828125¬-1.219469428062439¬1-101678101¬Quorndon Care Limited¬01509219024¬¬www.quorndoncare.co.uk¬01509413940¬
+{snip}
+ +

The file has two interesting syntactic features:

+
    +
  • the cell separator is the not sign (¬, \u00AC) rather than a comma
  • +
  • no cells are wrapped in double quotes; some cells contain (unescaped) double quotes
  • +
+ +

+ Requires: + WellFormedCsvCheck, + SyntacticTypeDefinition and + NonStandardCellDelimiter. +

+ +

Our user wants to be able to embed a map of these locations easily into my web page using a web component, such that she can use markup like:

+ +
	<emap src="http://media.nhschoices.nhs.uk/data/foi/SCL.csv" latcol="Latitude" longcol="Longitude">
+        
+ +

and see a map similar to that shown at https://github.com/JeniT/nhs-choices/blob/master/SCP.geojson, without converting the CSV file into GeoJSON.

+ +

To make the web component easy to define, there should be a native API on to the data in the CSV file within the browser.

+ +

+ Requires: + CsvToJsonTransformation. +

+ +
+ +
+

2.15 Use Case #15 - Intelligently Previewing CSV files

+

+ (Contributed by Jeni Tennison) +

+

+ All of the data repositories based on the CKAN software, such + as data.gov.uk, data.gov, and many + others, use JSON as the representation of the data when providing a preview of CSV data within a browser. + Server side pre-processing of the CSV files is performed to try and determine column + types, clean the data and transform the CSV-encoded data to JSON in order to provide the preview. JSON has many + features which make it ideal for delivering a preview of the data, originally in CSV format, + to the browser. +

+

+ Javascript is a hard dependency for interacting with data in the browser and as such + JSON was used as the serialization format because it was the most appropriate format for + delivering those data. As the object notation for Javascript JSON is natively understood + by Javascript it is therefore possible to use the data without any external dependencies. + The values in the data delivered map directly to common Javascript types and libraries for + processing and generating JSON, with appropriate type conversion, are widely available for + many programming languages. +

+

+ Beyond basic knowledge of how to work with JSON, there is no further burden on the user + to understand complex semantics around how the data should be interpreted. The user of the + data can be assured that the data is correctly encoded as UTF-8 and it is easily queryable + using common patterns used in everyday Javascript. None of the encoding and + serialization flaws with CSV are apparent, although badly structured CSV files will be + mirrored in the JSON. +

+

+ Requires: + WellFormedCsvCheck and + CsvToJsonTransformation. +

+

When providing the in-browser previews of CSV-formatted data, the utility of the preview application + is limited because the server-side processing of the CSV is not always able to determine + the data types (e.g. date-time) associated with data columns. As a result it is not possible + for the in-browser preview to offer functions such as sorting rows by date.

+ +

As an example, see the + Spend over £25,000 in The Royal Wolverhampton Hospitals NHS Trust example. + Note that the underlying data begins with:

+
Example 24
"Expenditure over £25,000- Payment made in January 2014",,,,,,,,
+,,,,,,,,
+Department Family,Entity,Date,Expense Type,Expense Area,Supplier,Transaction Number,Amount in Sterling,
+Department of Health,The Royal Wolverhampton Hospitals NHS Trust RL4,31/01/2014,Capital Project,Capital,STRYKER UK LTD,0001337928,31896.06,
+Department of Health,The Royal Wolverhampton Hospitals NHS Trust RL4,17/01/2014,SERVICE AGREEMENTS,Pathology,ABBOTT LABORATORIES LTD,0001335058,77775.13,
+...
+

A local copy of this dataset is available: file = mth-10-january-2014.csv

+

The header line here comes below an empty row, and there is metadata about the table in the row above the empty row. The preview code manages to + identify the headers from the CSV, and displays the metadata as the value in the first cell of the first row.

+

+ Requires: + MultipleHeadingRows and + AnnotationAndSupplementaryInfo. +

+

It would be good if the preview could recognise that the Date column contains a date and that the Amount in Sterling column contains a number, + so that it could offer options to filter/sort these by date/numerically.

+

+ Requires: + SemanticTypeDefinition, + SyntacticTypeDefinition and + UnitMeasureDefinition. +

+

Moreover, some of the values reported may refer to external definitions (from dictionaries or other sources). It would be useful to know where it is + possible to find such resources, to be able to properly handle and visualize the data, by linking to them.

+

+ Requires: + AssociationOfCodeValuesWithExternalDefinitions. +

+

Lastly, the web page where the CSV is published presents also useful metadata about it. It would be useful to be able to know and access these metadata + even though they are not included in the file.

+

These include:

+
    +
  • Resource title
  • +
  • Publisher
  • +
  • License
  • +
  • Abstract / description
  • +
  • Date last updated
  • +
+

+ Requires: + AnnotationAndSupplementaryInfo. +

+ +
+
+

2.16 Use Case #16 - Tabular Representations of NetCDF data Using CDL Syntax

+

+ (Contributed by Eric Stephan) +

+

NetCDF is a set of binary data formats, programming interfaces, and software libraries that help read and write scientific data files. + NetCDF provides scientists a means to share measured or simulated experiments with one another across the web. What makes + NetCDF useful is its ability to be self describing and provide a means for scientists to rely on existing data model + as opposed to needing to write their own. The classic NetCDF data model consists of variables, dimensions, and attributes. + This way of thinking about data was introduced with the very first NetCDF release, and is still the core of all NetCDF files. +

+

Among the tools available to the NetCDF community, two tools: ncdump and ncgen. The ncdump tool is used + by scientists wanting to inspect variables and attributes (metadata) contained in the NetCDF file. It also + can provide a full text extraction of data including blocks of tabular data representing by variables. + While NetCDF files are typically written by a software client, it is possible to generate NetCDF files using + ncgen and ncgen3 from a text format. The ncgen tool parses the text file and stores it in a binary format. +

+

Both ncdump and ncgen rely on a text format to represent the NetCDF file called network Common Data form + Language (CDL). The CDL syntax as shown below contains annotation along with blocks of data denoted by the + "data:" key. For the results to be legible for visual inspection the measurement data is written as delimited + blocks of scalar values. As shown in the example below CDL supports multiple variables or blocks of data. + The blocks of data while delimited need to be thought of as a vector or single column of tabular data + wrapped around to the next line in a similar way that characters can be wrapped around in a single cell block + of a spreadsheet to make the spreadsheet more visually appealing to the user. +

+
Example 25
netcdf foo {    // example NetCDF specification in CDL
+
+dimensions:
+lat = 10, lon = 5, time = unlimited;
+
+variables:
+  int     lat(lat), lon(lon), time(time);
+  float   z(time,lat,lon), t(time,lat,lon);
+  double  p(time,lat,lon);
+  int     rh(time,lat,lon);
+
+  lat:units = "degrees_north";
+  lon:units = "degrees_east";
+  time:units = "seconds";
+  z:units = "meters";
+  z:valid_range = 0., 5000.;
+  p:_FillValue = -9999.;
+  rh:_FillValue = -1;
+
+data:
+  lat   = 0, 10, 20, 30, 40, 50, 60, 70, 80, 90;
+  lon   = -140, -118, -96, -84, -52;
+}
+ +

+ The next example shows a small subset of data block taken from an actual NetCDF file. + The blocks of data while delimited need to be thought of as a vector or single column + of tabular data wrapped around to the next line in a similar way that characters can be + wrapped around in a single cell block of a spreadsheet to make the spreadsheet more + visually appealing to the user. +

+
Example 26
data:
+
+ base_time = 1020770640 ;
+
+ time_offset = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
+    34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68,
+    70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102,
+    104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130,
+    132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158,
+    160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
+    188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214,
+    216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242,
+    244, 246, 248, 250, 252, 254, 256, 258, 260, 262, 264, 266, 268, 270,
+    272, 274, 276, 278, 280, 282, 284, 286, 288, 290, 292, 294, 296, 298,
+    300, 302, 304, 306, 308, 310, 312, 314, 316, 318, 320, 322, 324, 326,
+    328, 330, 332, 334, 336, 338, 340, 342, 344, 346, 348, 350, 352, 354,
+    356, 358, 360, 362, 364, 366, 368, 370, 372, 374, 376, 378, 380, 382,
+    384, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 408, 410,
+    412, 414, 416, 418, 420, 422, 424, 426, 428, 430, 432, 434, 436, 438,
+    440, 442, 444, 446, 448, 450, 452, 454, 456, 458, 460, 462, 464, 466,
+    468, 470, 472, 474, 476, 478, 480, 482, 484, 486, 488, 490, 492, 494,
+    496, 498, 500, 502, 504, 506, 508, 510, 512, 514, 516, 518, 520, 522;
+ +

+ The format allows for error codes and missing values to be included. +

+

+ Requires: + WellFormedCsvCheck, + CsvValidation, + UnitMeasureDefinition, + MissingValueDefinition and + GroupingOfMultipleTables. +

+

+ Lastly, NetCDF files are typically collected together in larger datasets + where they can be analyzed, so the CSV data can be thought of a subset + of a larger dataset. +

+ Requires: + CsvAsSubsetOfLargerDataset and + AnnotationAndSupplementaryInfo. +

+ + +
+
+

2.17 Use Case #17 - Canonical mapping of CSV

+

+ (Contributed by David Booth and Jeremy Tandy) +

+ +

CSV is by far the commonest format within which open data is published, and is thus + typical of the data that application developers need to work with.

+ +

However, an object / object graph serialisation (of open data) is easier to consume within + software applications. For example, web applications (using HTML5 & Javascript) require + no extra libraries to work with data in JSON format. Similarly, RDF-encoded data in from + multiple sources can be simply combined or merged using SPARQL queries once persisted + within a triple store.

+ +

The + UK Government policy paper "Open Data: unleashing the potential" outlines a + + set of principles for publishing open data. Within this document, principle 9 states:

+ +

+ Release data quickly, and then work to make sure that it is available in open standard + formats, including linked data formats. +

+ +

The open data principles recognise how the additional utility to be gained from publishing in + linked data formats must be balanced against the additional effort incurred by the + data publisher to do so and the resulting delay to publication of the data. Data publishers + are required to release data quickly - which means making the data available in + a format convenient for them such as CSV dumps from databases or spread sheets.

+ +

One of the hindrances to publishing in linked data formats is the difficulty in + determining the ontology or vocabulary (e.g. the classes, predicates, namespaces and + other usage patterns) that should be used to describe the data. Whilst it is + only reasonable to assume that a data publisher best knows the intended meaning of their + data, they cannot be expected to determine the ontology or vocabulary most applicable to + to a consuming application!

+ +

Furthermore, in lieu of agreed de facto standard vocabularies or ontologies for a given + application domain, it is highly likely that disparate applications will conform to different data + models. How should the data publisher choose which of the available vocabularies or + ontologies to use when publishing (if indeed they are aware of those applications at all)!

+ +

In order to assist data publishers provide data in linked data formats without + the need to determine ontologies or vocabularies, it is necessary to separate the syntactic + mapping (e.g. changing format from CSV to JSON) from the semantic mapping + (e.g. defining the transformations required to achieve semantic alignment with a target + data model).

+ +

As a result of such separation, it will be possible to establish a canonical + transformation from CSV conforming to the + core tabular data model [tabular-data-model] + to an object graph serialisation such as JSON.

+ +

+ Requires: + WellFormedCsvCheck, + CsvToJsonTransformation and + CanonicalMappingInLieuOfAnnotation. +

+ +

This use case assumes that JSON is the target serialisation for application developers + given the general utility of that format. However, by considering JSON-LD [json-ld], it becomes + trivial to map CSV-encoded tabular data via JSON into a canonical RDF model. In doing so + this enables CSV-encoded tabular data to be published in linked data formats + as required in the open data principle 9 at no extra effort to the data publisher as + standard mechanisms are available for a data user to transform the data from CSV to RDF.

+ +

+ Requires: + CsvToRdfTransformation. +

+ +

In addition, open data principle 14 requires that:

+ +

+ Public bodies should publish relevant metadata about their datasets […]; and they + should publish supporting descriptions of the format, provenance and meaning of the data. +

+ +

To achieve this, data publishers need to be able to publish supplementary metadata concerning + their tabular datasets, such as title, usage license and description.

+ +

+ Requires: + AnnotationAndSupplementaryInfo. +

+ +

Applications may automatically determine the data type (e.g. date-time, number) associated + with cells in a CSV file by parsing the data values. However, on occasion, this is prone to + mistakes where data appears to resemble something else. This is especially + prevalent for dates. For example, 1/4 is often confused with 1 April + rather than 0.25. In such situations, it is beneficial if guidance can be given to the + transformation process indicating the data type for given columns.

+ +

+ Requires: + SyntacticTypeDefinition. +

+ +

Provision of CSV data coupled with a canonical mapping provides significant utility by itself. However, + there is nothing stopping a data publisher from adding annotation defining data semantics once, say, + an appropriate de facto standard vocabulary has been agreed within the community of use. Similarly, a + data consumer may wish to work directly with the canonical mapping and wish to ignore any semantic + annotations provided by the publisher.

+ +
+
+

2.18 Use Case #18 - Supporting Semantic-based Recommendations

+

+ (Contributed by Davide Ceolin and Valentina Maccatrozzo) +

+

In the ESWC-14 Challenge: Linked Open Data-enabled Recommender Systems, + participants are provided with a series of datasets about books in TSV format.

+

A first dataset contains a set of user identifiers and their ratings for a bunch of books each. Each book is represented by means of a numeric identifier.

+
Example 27
DBbook_userID,	DBbook_itemID,	rate
+{snip}
+6873,		5950,		1
+6873,		8010,		1
+6873,		5232,		1
+{snip}
+

Ratings can be boolean (0,1) or Likert scale values (from 1 to 5), depending on the challenge task considered.

+

+ Requires: + SyntacticTypeDefinition, + SemanticTypeDefinition and + NonStandardCellDelimiter. +

+ +

A second file provides a mapping between book ids and their names and dbpedia URIs:

+ +
Example 28
DBbook_ItemID	name				DBpedia_uri
+{snip}
+1		Dragonfly in Amber		http://dbpedia.org/resource/Dragonfly_in_Amber
+10		Unicorn Variations		http://dbpedia.org/resource/Unicorn_Variations
+100		A Stranger in the Mirror	http://dbpedia.org/resource/A_Stranger_in_the_Mirror
+1000		At All Costs			http://dbpedia.org/resource/At_All_Costs
+{snip}
+

+ Requires: + ForeignKeyReferences. +

+ +

Participants are requested to estimate the ratings or relevance scores (depending on the task) that users would + attribute to a set of books reported in an evaluation dataset:

+ +
Example 29
DBbook_userID	DBbook_itemID
+{snip}
+6873		5946
+6873		5229
+6873		3151
+{snip}
+

+ Requires: + R-AssociationOfCodeValuesWithExternalDefinitions. +

+

The challenge mandates the use of Linked Open Data resources in the recommendations.

+

An effective manner to satisfy this requirement is to make use of undirected semantic paths. + An undirected semantic path is a sequence of entities (subject or object) and properties that link two items, for instance:

+ +
	{Book1 property1 Object1 property2 Book2}
+	
+ +

This sequence results from considering the triples (subject-predicate-object) in a given Linked Open Data resource (e.g. DBpedia), + independently of their direction, such that the starting and the ending entities are the desired items and that the subject (or object) of + a triple is the object (or subject) of the following triple. + For example, the sequence above may result from the following triples:

+ +
	Book1 property1 Object1
+	Book2 property1 Object1
+	
+ +

+ Undirected semantic paths are classified according to their length. Fixed a length, one can extract all the undirected semantic paths of that length + that link two items within a Linked Open Data resource by running a set of SPARQL queries. + This is necessary because an undirected semantic path actually corresponds to the union of a set of directed semantic paths. In the source, data are stored + in terms of directed triples (subject-predicate-object). +

+

+ The number of queries that is necessary to run in order to obtain all the undirected semantic paths that link to items is exponential of the + length of the path itself (2n). Because of the complexity of this task and of the possible latency times deriving from + it, it might be useful to cache these results. +

+ +

CSV is a good candidate for caching undirected semantic paths, because of its ease of use, sharing, reuse. However, there are some open issues + related to this. + First, since paths may present a variable number of components, one might want to represent paths in a single cell, + while being able to separate the path elements when necessary. +

+

For example, in this file, undirected semantic paths are grouped by means + of double quotes, and path components are separated by commas. The starting and ending elements of the undirected semantic paths (Book1 and Book2) are represented + in two separate columns by means of the book identifiers used in the challenge (see the example below). +

+ +
Example 30
Book1	Book2	Path
+{snip}
+1	7680	"http://dbpedia.org/ontology/language,http://dbpedia.org/resource/English_language,http://dbpedia.org/ontology/language"
+1	2	"http://dbpedia.org/ontology/author,http://dbpedia.org/resource/Diana_Gabaldon,http://dbpedia.org/ontology/author"
+1	2	"http://dbpedia.org/ontology/country,http://dbpedia.org/resource/United_States,http://dbpedia.org/ontology/country"
+{snip}
+

+ Requires: + CellMicrosyntax and + RepeatedProperties. +

+

+ Second, the size of these caching files may be remarkable. For example, the size of this file described above is ~2GB, and that may imply prohibitive + loading times, especially when making a limited number of recommendations.

+

Since rows are sorted according to the starting and the ending book of the undirected semantic path, then all the undirected semantic paths that link two books + are present in a region of the table formed by consecutive rows.

+

By having at our disposal an annotation of such regions indicating which book they describe, one might be able to select the "slice" of + the file he needs to make a recommendation, without having to load it entirely.

+

+ Requires: + AnnotationAndSupplementaryInfo and + RandomAccess. +

+
+
+

2.19 Use Case #19 - Supporting Right to Left (RTL) Directionality

+

+ (Contributed by Yakov Shafranovich) +

+

Writing systems affect the way in which information is displayed. In some cases, these writing + systems affect the order in which characters are displayed. Latin based languages display text + left-to-right across a page (LTR). Languages such as Arabic and Hebrew are written in scripts + whose dominant direction is right to left (RTL) when displayed, however when it involves + non-native text or numbers it is actually bidirectional.

+

Irrespective of the LTR or RTL display of characters in a given language, data is serialised + such that the bytes are ordered in one sequential order.

+ +

Content published in Hebrew and Arabic provide examples of RTL display behaviour.

+ +
Note

Tabular data from originating from countries where vertical writing is the norm + (e.g. China, Japan) appear to be published with rows and columns as defined in [RFC4180] (e.g. + each horizontal line in the data file conveys a row of data, with the first line optionally + providing a header with column names). Rows are published in the left to right topology.

+ +

The results from the Egyptian Referendum of 2012 + illustrate the problem, as can be seen in Fig. 2 Snippet of web page displaying Egyptian Referendum results (2012).

+ +
+ egypt-referendum-2012-result-web-page-snip.PNG +
Fig. 2 Snippet of web page displaying Egyptian Referendum results (2012)
+
+ +

The content in the + CSV data file + is serialised in the order as illustrated below (assuming LTR rendering):

+ +
Example 31

+‌ا‌ل‌م‌ح‌ا‌ف‌ظ‌ة‌,‌ن‌س‌ب‌ة‌ ‌م‌و‌ا‌ف‌ق‌,‌ن‌س‌ب‌ة‌ ‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌,‌ع‌د‌د‌ ‌ا‌ل‌ن‌ا‌خ‌ب‌ي‌ن‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ص‌ح‌ي‌ح‌ة‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ب‌ا‌ط‌ل‌ة‌,‌ن‌س‌ب‌ة‌ ‌ا‌ل‌م‌ش‌ا‌ر‌ك‌ة‌,‌م‌و‌ا‌ف‌ق‌,‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌
+‌ا‌ل‌ق‌ل‌ي‌و‌ب‌ي‌ة‌,60.0,40.0,"2,639,808","853,125","15,224",32.9,"512,055","341,070"
+‌ا‌ل‌ج‌ي‌ز‌ة‌,66.7,33.3,"4,383,701","1,493,092","24,105",34.6,"995,417","497,675"
+‌ا‌ل‌ق‌ا‌ه‌ر‌ة‌,43.2,56.8,"6,580,478","2,254,698","36,342",34.8,"974,371","1,280,327"
+‌ق‌ن‌ا‌,84.5,15.5,"1,629,713","364,509","6,743",22.8,"307,839","56,670"
+{snip}
+
+ +

A copy of the referendum results data file is also available locally.

+ +
Note

+ Readers should be aware that both the right-to-left text direction and the cursive nature of Arabic text + has been explicitly overridden in the example above in order to display each individual character in + sequential left-to-right order. +

+ +

The directionality of the content as displayed does not affect the logical structure of the tabular data; + i.e. the cell at index zero is followed by the cell at index 1, and then index 2 etc.

+ +

However, without awareness of the directionality of the content, an application may display data in + a way that is unintuitive for the a RTL reader. For example, viewing the CSV file using + Libre Office Calc (tested + using version 3 configured with English (UK) locale) demonstrates the challenge in rendering the content correctly. + Fig. 3 CSV data file containing Egyptian Referendum results (2012) displayed in Libre Office Calc shows how the + content is + incorrectly rendered; cells progress from left-to-right yet, on the positive side, the Arabic text + within a given field runs from right-to-left. Similar behaviour is observed in Microsoft Office Excel 2007.

+ +
+ egypt-referendum-2012-result-csv-in-libre-office-3.png +
Fig. 3 CSV data file containing Egyptian Referendum results (2012) displayed in Libre Office Calc
+
+ +

By contrast, we can see Fig. 4 CSV data file containing Egyptian Referendum results (2012) displayed in TextWrangler. The simple + TextWrangler text editor is not aware that the overall + direction is right-to-left, but does apply the Unicode bidirectional algorithm such that lines starting with an + Arabic character have a direction base of right-to-left. However, as a result, the numeric digits are also displayed + right to left, which is incorrect.

+ +
+ egypt-referendum-2012-result-csv-in-textwrangler.png +
Fig. 4 CSV data file containing Egyptian Referendum results (2012) displayed in TextWrangler
+
+ +

It is clear that a mechanism needs to be provided such that one can explicitly declare the directionality + which applies when parsing and rendering the content of CSV files.

+ +
Note
+

From Unicode version 6.3 onwards, the Unicode Standard contains new control codes (RLI, LRI, FSI, PDI) to enable authors to express isolation at the same time as direction in inline bidirectional text. The Unicode Consortium recommends that isolation be used as the default for all future inline bidirectional text embeddings. To use these new control codes, however, it will be necessary to wait until the browsers support them. The new control codes are:

+
    +
  • RLI (RIGHT-TO-LEFT ISOLATE) U+2067 to set direction right-to-left
  • +
  • LRI (LEFT-TO-RIGHT ISOLATE) U+2066 to set direction left-to-right
  • +
  • FSI (FIRST STRONG ISOLATE) U+2068 to set direction according to the first strong character
  • +
  • PDI (POP DIRECTIONAL ISOLATE) U+2069 to terminate the range set by RLI, LRI or FSI
  • +
+

More information on setting the directionality of text without markup can be found here

+
+ +

Requires: + RightToLeftCsvDeclaration. +

+ +
+ + +
+

2.20 Use Case #20 - Integrating components with the TIBCO Spotfire platform using tabular data

+

+ (Contributed Yakov Shafranovich) +

+

A systems integrator seeks to integrate a new component into the + TIBCO Spotfire analytics platform. + Reviewing the documentation that describes how to extend the platform indicates + that Spotfire employs a common tabular file format for all products: the + Spotfire Text Data Format + (STDF).

+

The example from the STDF documentation (below) illustrates a number of the key + differences with the standard CSV format defined in [RFC4180].

+
Example 32
<bom>\! filetype=Spotfire.DataFormat.Text; version=1.0;
+\* ich bin ein berliner
+Column A;Column #14B;Kolonn Ö;The n:th column;
+Real;String;Blob;Date;
+-123.45;i think there\r\nshall never be;\#aaXzD;2004-06-18;
+1.0E-14;a poem\r\nlovely as a tree;\#ADB12=;\?lost in time;
+222.2;\?invalid text;\?;2004-06-19;
+\?error11;\\förstår ej\\;\#aXzCV==;\?1979;
+3.14;hej å hå\seller?;\?NIL;\?#ERROR;
+ +
    +
  • +

    The first line of the STDF file includes a + byte order mark (BOM), the + character sequence "\!" and metadata about the file type and version + to inform consuming applications.

    +

    Requires: + AnnotationAndSupplementaryInfo.

    +
  • +
  • +

    The second line is a comment line which is ignored during processing. + The comment is recognised from the initial sequence of characters within the line: + "\*".

    +

    Requires: + CommentLines.

    +
  • +
  • +

    Lines three and four provide metadata: column heading names and the data types + (including integer, real, string, date, time, datetime and blob) for each column + respectively.

    +

    Requires: + MultipleHeadingRows and + SyntacticTypeDefinition.

    +
  • +
  • +

    Cells are delimited using the semi-colon ";" character.

    +

    Requires: + NonStandardCellDelimiter.

    +
  • +
  • +

    Date and time values are strictly formatted; YYYY-MM-DD and + HH:MM:SS respectively.

    +

    Requires: + CellMicrosyntax.

    +
  • +
  • +

    Base64-encoded binary values may be included. These are designated by setting + the initial cell value to "\#".

    +
  • +
  • +

    A number of escape sequences for special characters are supported; e.g. + "\\" (backslash within a string), + "\s" (semicolon within a string - not a cell or list item delimiter), + "\n" (newline within a string) and + "\t" (tab within a string) etc.

    +

    These special characters don't affect the parsing of the data but are further + examples of the use of microsyntax within cells.

    +

    Requires: + CellMicrosyntax.

    +
  • +
  • +

    Null and invalid values are indicated by setting the initial character sequence of a cell to "\?". Optionally, an error code or other informative statement may follow.

    +

    + Requires: + MissingValueDefinition and + CellMicrosyntax. +

    +
  • +
+ +

Although not shown in this example, STDF also supports list types:

+
    +
  • A valid list value must begin with "\[" and end with "\]" followed by a terminating semicolon.
  • +
  • All list items are terminated by a semicolon, including the last item in a list.
  • +
+ +

+ Requires: + CellMicrosyntax. +

+
+ +
+

2.21 Use Case #21 - Publication of Biodiversity Information from GBIF using the Darwin Core Archive Standard

+

+ (Contributed by Tim Robertson, GBIF, and Jeremy Tandy) +

+ +

A citizen scientist investigating biodiversity in the Parque Nacional de Sierra Nevada, + Spain, aims to create a compelling web application that combines biodiversity + information with other environmental factors - displaying this information on a map and + as summary statistics.

+ +

The Global Biodiversity Information Facility (GBIF), + a government funded open data initiative that spans over 600 institutions worldwide, has + mobilised more that 435 million records + describing the occurrence of flora and fauna.

+ +

Included in their data holdings is + "Sinfonevada: Dataset of Floristic diversity in Sierra Nevada forest (SE Spain)", + containing around 8000 records belonging to 270 taxa collected between January 2004 and + December 2005.

+ +

As with the majority of datasets published via GBIF, the Sinfonevada dataset is available + in the Darwin Core Archive + format (DwC-A).

+ +

In accordance with the DwC-A specification, the Sinfonevada dataset is packaged as a zip + file containing:

+
    +
  • tab delimited tabular data file: occurrence.txt
  • +
  • metadata describing that tabular data file: meta.xml
  • +
  • supplementary dataset metadata: eml.xml
  • +
+ +

The metadata file included in the zip package must always be named meta.xml, + whilst the tabular data file and supplementary metadata are explicitly identified within the + main metadata file.

+ +

A copy of the zip package is provided for reference. Snippets of + the tab delimited tabular data file and the full metdata file "meta.xml" are provided below.

+ +
Example 33
"occurrence.txt"
+----------------
+
+id	modified	institutionCode	collectionCode	basisOfRecord	catalogNumber	eventDate	fieldNumber	continent	countryCode	stateProvince	county	locality	minimumElevationInMeters	maximumElevationInMeters	decimalLatitude	decimalLongitude	coordinateUncertaintyInMeters	scientificName	kingdom	phylum	class	order	family	genus	specificEpithet	infraspecificEpithet	scientificNameAuthorship
+OBSNEV:SINFONEVADA:SINFON-100-005717-20040930	2013-06-20T11:18:18	OBSNEV	SINFONEVADA	HumanObservation	SINFON-100-005717-20040930	2004-09-30 & 2004-09-30		Europe	ESP	GR	ALDEIRE		1992	1992	37.12724018	-3.116135071	1	Pinus sylvestris Lour.	Plantae	Pinophyta	Pinopsida	Pinales	Pinaceae	Pinus	sylvestris		Lour.
+OBSNEV:SINFONEVADA:SINFON-100-005966-20040930	2013-06-20T11:18:18	OBSNEV	SINFONEVADA	HumanObservation	SINFON-100-005966-20040930	2004-09-30 & 2004-09-30		Europe	ESP	GR	ALDEIRE		1992	1992	37.12724018	-3.116135071	1	Berberis hispanica Boiss. & Reut.	Plantae	Magnoliophyta	Magnoliopsida	Ranunculales	Berberidaceae	Berberis	hispanica		Boiss. & Reut.
+OBSNEV:SINFONEVADA:SINFON-100-008211-20040930	2013-06-20T11:18:18	OBSNEV	SINFONEVADA	HumanObservation	SINFON-100-008211-20040930	2004-09-30 & 2004-09-30		Europe	ESP	GR	ALDEIRE		1992	1992	37.12724018	-3.116135071	1	Genista versicolor Boiss. ex Steud.	Plantae	Magnoliophyta	Magnoliopsida	Fabales	Fabaceae	Genista	versicolor		Boiss. ex Steud.
+{snip}
+ +

The key variances of this tabular data file with RFC 4180 is the use of TAB + %x09 as the cell delimiter and LF %x0A as the row + terminator.

+ +

Also note the use of two adjacent TAB characters to indicate an empty cell.

+ +
Example 34
"meta.xml"
+----------
+
+<archive xmlns="http://rs.tdwg.org/dwc/text/" metadata="eml.xml">
+  <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
+    <files>
+      <location>occurrence.txt</location>
+    </files>
+    <id index="0" />
+    <field index="1" term="http://purl.org/dc/terms/modified"/>
+    <field index="2" term="http://rs.tdwg.org/dwc/terms/institutionCode"/>
+    <field index="3" term="http://rs.tdwg.org/dwc/terms/collectionCode"/>
+    <field index="4" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
+    <field index="5" term="http://rs.tdwg.org/dwc/terms/catalogNumber"/>
+    <field index="6" term="http://rs.tdwg.org/dwc/terms/eventDate"/>
+    <field index="7" term="http://rs.tdwg.org/dwc/terms/fieldNumber"/>
+    <field index="8" term="http://rs.tdwg.org/dwc/terms/continent"/>
+    <field index="9" term="http://rs.tdwg.org/dwc/terms/countryCode"/>
+    <field index="10" term="http://rs.tdwg.org/dwc/terms/stateProvince"/>
+    <field index="11" term="http://rs.tdwg.org/dwc/terms/county"/>
+    <field index="12" term="http://rs.tdwg.org/dwc/terms/locality"/>
+    <field index="13" term="http://rs.tdwg.org/dwc/terms/minimumElevationInMeters"/>
+    <field index="14" term="http://rs.tdwg.org/dwc/terms/maximumElevationInMeters"/>
+    <field index="15" term="http://rs.tdwg.org/dwc/terms/decimalLatitude"/>
+    <field index="16" term="http://rs.tdwg.org/dwc/terms/decimalLongitude"/>
+    <field index="17" term="http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters"/>
+    <field index="18" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
+    <field index="19" term="http://rs.tdwg.org/dwc/terms/kingdom"/>
+    <field index="20" term="http://rs.tdwg.org/dwc/terms/phylum"/>
+    <field index="21" term="http://rs.tdwg.org/dwc/terms/class"/>
+    <field index="22" term="http://rs.tdwg.org/dwc/terms/order"/>
+    <field index="23" term="http://rs.tdwg.org/dwc/terms/family"/>
+    <field index="24" term="http://rs.tdwg.org/dwc/terms/genus"/>
+    <field index="25" term="http://rs.tdwg.org/dwc/terms/specificEpithet"/>
+    <field index="26" term="http://rs.tdwg.org/dwc/terms/infraspecificEpithet"/>
+    <field index="27" term="http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"/>
+  </core>
+</archive>
+ +

The metadata file specifies:

+
    +
  • a link to the supplementary metadata file eml.xml
  • +
  • the data file encoding UTF-8
  • +
  • cell delimiter
  • +
  • row terminator
  • +
  • cell escaping
  • +
  • the number of rows to skip at the beginning of the file before the data section (e.g. the length of the header section)
  • +
  • the type of entity that each row in the tabular dataset describes
  • +
  • the name of the tabular data file occurence.txt
  • +
  • the column which provides the unique identifier for the entity described by each row
  • +
  • the property type associated with each column based on the column number index
  • +
+ +

Requires: + NonStandardCellDelimiter, + ZeroEditAdditionOfSupplementaryMetadata and + AnnotationAndSupplementaryInfo.

+ +

The ignoreHeaderLines attribute can be used to ignore files with column + headings or preamble comments.

+ +

In this particular case, the tabular data file is packaged within the zip file, and + is referenced locally. However, the DwC-A specification also supports annotation of remote + tabular data files, and thus does not require any modification of the source datafiles + themselves.

+ +

Requires: + LinkFromMetadataToData and + IndependentMetadataPublication.

+ +

Although not present in this example, DwC-A also supports the ability to specify + a property-value pair that is applied to every row in the tabular data file, or, + in the case of sparse data, for that property-value pair to be added where the property + is absent from the data file (e.g. providing a default value for a property).

+ +

Requires: + SpecificationOfPropertyValuePairForEachRow.

+ +

Future releases of DwC-A also seek to provide stronger typing of data formats; + at present only date formats are validated.

+ +

Requires: + SyntacticTypeDefinition.

+ +

Whilst the DwC-A format is embedded in many software platforms, including web based tools, + none of these seem to fit the needs of the citizen scientist. They want to use existing + javascript libraries such as Leaflet, an open-Source + javascript library for interactive maps, where possible to simplify their web development + effort.

+ +

Leaflet has good support for GeoJSON, a JSON format + for encoding a variety of geographic data structures.

+ +

In the absence of standard tooling, the citizen scientist needs to write a custom parser + to convert the tab delimited data into GeoJSON. An example GeoJSON object resulting from + this transformation is provided below.

+ +
Example 35
{
+    "type": "Feature",
+    "id": "OBSNEV:SINFONEVADA:SINFON-100-005717-20040930",
+    "properties": {
+        "modified": "2013-06-20T11:18:18",
+        "institutionCode": "OBSNEV",
+        "collectionCode": "SINFONEVADA",
+        "basisOfRecord": "HumanObservation",
+        "catalogNumber": "SINFON-100-005717-20040930",
+        "eventDate": "2004-09-30 & 2004-09-30",
+        "fieldNumber": "",
+        "continent": "Europe",
+        "countryCode": "ESP",
+        "stateProvince": "GR",
+        "county": "ALDEIRE",
+        "locality": "",
+        "minimumElevationInMeters": "1992",
+        "maximumElevationInMeters": "1992",
+        "coordinateUncertaintyInMeters": "1",
+        "scientificName": "Pinus sylvestris Lour.",
+        "kingdom": "Plantae",
+        "phylum": "Pinophyta",
+        "class": "Pinopsida",
+        "order": "Pinales",
+        "family": "Pinaceae",
+        "genus": "Pinus",
+        "specificEpithet": "sylvestris",
+        "infraspecificEpithet": "",
+        "scientificNameAuthorship": "Lour."
+    },
+    "geometry": {
+        "type": "Point",
+        "coordinates": [-3.116135071, 37.12724018, 1992]
+    }
+}
+ +
Note

+ GeoJSON coordinates are specified in order of longitude, latitude and, optionally, altitude. +

+ +

Requires: + CsvToJsonTransformation.

+ +

The citizen scientist notes that many of the terms in a given row are drawn from controlled + vocabularies; geographic names and taxonomies. For the application, they want to be able to + refer to the authoritative definitions for these controlled vocabularies, say, to provide + easy access for users of the application to the defintions of scientific terms such as "Pinophyta".

+ +

Requires: + AssociationOfCodeValuesWithExternalDefinitions.

+ +

Thinking to the future of their application, our citizen scientist anticipates the need + to aggregate data across multiple datasets; each of which might use different column headings + depending on who compiled the tabular dataset. Furthermore, how can one be sure they are + comparing things of equivalent type?

+ +

To remedy this, they want to use the definitions from the metadata file + meta.xml. The easiest approach to achieve this is to modify their parser + to export [json-ld] and transform the tabular data into RDF that can be easily + reconciled.

+ +

The resultant "GeoJSON-LD" takes the form (edited for brevity):

+ +
Example 36
{
+    "@context": {
+        "base": "http://www.gbif.org/dataset/db6cd9d7-7be5-4cd0-8b3c-fb6dd7446472/",
+        "Feature": "http://example.com/vocab#Feature",
+        "Point": "http://example.com/vocab#Point",
+        "modified": "http://purl.org/dc/terms/modified",
+        "institutionCode": "http://rs.tdwg.org/dwc/terms/institutionCode",
+        "collectionCode": "http://rs.tdwg.org/dwc/terms/collectionCode",
+        "basisOfRecord": "http://rs.tdwg.org/dwc/terms/basisOfRecord",
+{snip}
+    },
+    "type": "Feature",
+    "@type": "http://rs.tdwg.org/dwc/terms/Occurrence",
+    "id": "OBSNEV:SINFONEVADA:SINFON-100-005717-20040930",
+    "@id": "base:OBSNEV:SINFONEVADA:SINFON-100-005717-20040930",
+    "properties": {
+        "modified": "2013-06-20T11:18:18",
+        "institutionCode": "OBSNEV",
+        "collectionCode": "SINFONEVADA",
+        "basisOfRecord": "HumanObservation",
+{snip}
+    },
+    "geometry": {
+        "type": "Point",
+        "coordinates": [-3.116135071, 37.12724018, 1992]
+    }
+}
+ +

The complete JSON object may be retrieved here.

+ +

The unique identifier for each "occurence" record has been mapped to + a URI by appending the local identifier (from column id) + to the URI of the dataset within which the recond occurs.

+ +

Requires: + URIMapping + SemanticTypeDefinition and + CsvToRdfTransformation.

+ +
Note
+

The @type of the entity is taken from the rowType attribute + within the metadata file.

+
+ +
Note
+

The amendment of the GeoJSON specification to include JSON-LD is a work in progress at the time + of writing. Details can be found on the GeoJSON GitHub.

+
+ +
Note
+

It is the hope of the DwC-A format specification authors that the availability + of general metadata vocabulary for describing CSV files, or indeed any tabular text + datasets, will mean that DwC-A can be deprecated. This would allow the biodiversity + community, and initiatives such as GBIF, to spend their efforts developing tools that + support the generic standard rather than their own domain specific conventions and + specifications, thus increasing the accessibility of biodiversity data.

+ +

To achieve this goal, it essential that the key characteristics of the DwC-A format + can be adequately described, thus enabling the general metadata vocabulary to be adopted + without needing to modify the existing DwC-A encoded data holdings.

+
+ +
+ +
+

2.22 Use Case #22 - Making sense of other people's data

+

+ (Contributed by Steve Peters via Phil Archer with input from Ian Makgill) +

+

spendnetwork.com harvests spending data from multiple UK local and central government CSV files. + It adds new metadata and annotations to the data and cross-links suppliers to OpenCorporates and, + elsewhere, is beginning to map transaction types to different categories of spending.

+

For example, East Sussex County Council publishes its + spending data as Excel spreadsheets.

+

A snippet of data from East Sussex County Council indicating payments over £500 for the second financial quarter of 2011 is below to illustrate. + White space has been added for clarity. The full data file for that period (saved in CSV format from Microsoft Excel 2007) is provided here: + ESCC-payment-data-Q2281011.csv

+
Example 37
Transparency Q2 - 01.07.11 to 30.09.11 as at 28.10.11,,,,,
+                         Name,          Payment category,   Amount,                        Department,Document no.,Post code
+{snip}
+               MARTELLO TAXIS,   Education HTS Transport,     £620,"Economy, Transport & Environment",  7000785623,     BN25
+               MARTELLO TAXIS,   Education HTS Transport, "£1,425","Economy, Transport & Environment",  7000785624,     BN25
+MCL TRANSPORT CONSULTANTS LTD,        Passenger Services, "£7,134","Economy, Transport & Environment",  4500528162,     BN25
+MCL TRANSPORT CONSULTANTS LTD,Concessionary Fares Scheme,"£10,476","Economy, Transport & Environment",  4500529102,     BN25
+{snip}
+

This data is augmented by spendnetwork.com and presented in a Web page. + The web page for East Sussex County Council is illustrated in Fig. 5 Payments over £500 for East Sussex County Council July-Sept 2011, illustrated by spendnetwork

+
+ spendnetwork1.png +
Fig. 5 Payments over £500 for East Sussex County Council July-Sept 2011, illustrated by spendnetwork
+
+

Notice the Linked Data column that links to + OpenCorporates data on MCL Transport Consultants Ltd. + If we follow the 'more' link we see many more cells that spendnetwork would like to include (see + Fig. 6 Payment transaction details, illustrated by spendnetwork). Where data is available + from the original spreadsheet it has been included.

+
+ spendnetwork2.png +
Fig. 6 Payment transaction details, illustrated by spendnetwork
+
+

The schema here is defined by a third party (spendnetwork.com) to make sense of the original data within their own model + (only some of which is shown here, spendnetwork.com also tries to categorize transactions and more). This model exists independently of + multiple source datasets and entails a mechanism for reusers to link to the original data from the metadata. + Published metadata can be seen variously as feedback, advertising, enrichment or annotations. Such information could help the publisher to + improve the quality of the original source, however, for the community at large it reduces the need for repetition of the work done to make + sense of the data and facilitates a network effect. It may also be the case that + the metadata creator is better able to put the original data into a wider context with more accuracy and commitment than the original publisher.

+

Another (similar) scenario is LG-Inform. This harvests government statistics from multiple sources, + many in CSV format, and calculate rates, percentages & trends etc. and packages them as a set of performance metrics/measures. Again, it + would be very useful for the original publisher to know, through metadata, that their source has been defined and used (potentially alongside + someone else's data) in this way.

+

See http://standards.esd.org.uk/ and the "Metrics" tab therein; e.g. + percentage of measured children in reception year classified as obese (3333).

+

The analysis of datasets undertaken by both spendnetwork.com and LG-Inform to make sense of other people's tabular data is time-consuming + work. Making that metadata available is a potential help to the original data publisher as well as other would-be reusers of it.

+

+ Requires: + WellFormedCsvCheck, + IndependentMetadataPublication, + ZeroEditAdditionOfSupplementaryMetadata, + AnnotationAndSupplementaryInfo, + AssociationOfCodeValuesWithExternalDefinitions, + SemanticTypeDefinition, + URIMapping and + LinkFromMetadataToData. +

+
+ +
+

2.23 Use Case #23 - Collating humanitarian information for crisis response

+

+ (Contributed by Tim Davies) +

+ +

During a crisis response, information managers within the humanitarian community face + a significant challenge in trying to collate data regarding humanitarian needs and response + activities conducted by a large number of humanitarian actors. The schemas for these data + sets are generally not standardized across different actors nor are the mechanisms for + sharing the data. In the best case, this results in a significant delay between the collection + of data and the formulation of that data into a common operational picture. In the worst case, + information is simply not shared at all, leaving gaps in the understanding of the field situation. +

+ +

The Humanitarian eXchange Language + (HXL) project seeks to address this concern; enabling information from diverse parties to be + collated into a single "Humanitarian Data Registry". Supporting tools are provided to assist + participants in a given response initiative in finding information within this registry to + meet their needs.

+ +

The HXL standard is designed to be a common publishing format for humanitarian data. + A key design principle of the HXL project is that the data publishers are able to continue + publication of their data using their existing systems. Unsurprisingly, data publishers + often provide their data in tabular formats such as CSV, having exported the content from + spreadsheet applications. As a result, the HXL standard is entirely based on tabular data.

+ +

During their engagement with the humanitarian response community, the HXL project team have identified two major concerns when working with tabular data:

+
    +
  • Tabular data needs to be created, read by and exchanged between people speaking different languages. Many of these are basic spreadsheet users who find it far easier to use data with natural and clear language in the column headings. Having the column headings in their own language makes creating and interpreting the data a lot easier.
  • +
  • Tabular data needs to be created that contains literal values in multiple languages. For example, the name of a town in English, French and Arabic. The total number of languages that the data might be expressed in cannot be easily determined in advance, and it should be possible for a data manager to introduce a new language variant of a column easily.
  • +
+ +

To address these issues, the HXL project have developed a number of + conventions + for publishing tabular data in CSV format.

+ +

Column headings in the tabular data are supplemented with short hashtags that are defined in the HXL hashtag dictionary. The hashtag provides the normative meaning of the data in the column while the column header from the original data, a literal text string, is informative. This allows software systems to quickly ascertain the meaning of the data irrespective of the column heading and language used in the original data. For example, where a column provides information on the numbers of people affected by an emergency, the heading may be one of: "People affected", "Affected", "# de personnes concernées", "Afectadas/os" etc. The hashtag #affected is used to provide a common key to interpret the data.

+ +
Example 38
. Cluster,     District,  People affected,   People reached
+  #sector,        #adm1,        #affected,         #reached
+     WASH,        Coast,             9000,             9000
+     WASH,    Mountains,             1000,              200
+Education,        Coast,            15500,             8000
+Education,    Mountains,              750,              600
+   Health,        Coast,            20000,             3500
+   Health,    Mountains,             3500,             1500
+ +

(whitespace included for clarity)

+ +

Requires: + MultipleHeadingRows and + SemanticTypeDefinition.

+ +

Hashtags may be supplemented with attributes to refine the meaning of the data. A suggested set of attributes is provided in the HXL hashtag dictionary. For example, attributes may be used to specify the language used for the text in a given column using "+" followed by an ISO 639 language code:

+ +
Example 39
.    Project title,             Titre du projet
+      #activity+en,                #activity+fr
+Malaria treatments,     Traitement du paludisme
+  Teacher training,Formation des enseignant(e)s
+ +

(whitespace included for clarity)

+ +

Requires: + MultilingualContent.

+ +

Where multiple data-values for a given field code are provided in a single + row, the field code is repeated - as illustrated in the example below that provides + geocodes for multiple locations pertaining to the subject of the record.

+ +
Example 40
P-code  1,P-code  2,P-code  3
+#loc+code,#loc+code,#loc+code
+   020503,         ,
+   060107,   060108,
+   173219,         ,
+   530012,   530013,   530015
+   279333,         ,
+ +

(whitespace included for clarity)

+ +

Requires: + RepeatedProperties.

+ +

In the example above, we see an often repeated pattern where data includes codes to reference some authoritative + term, definition or other resource; e.g. the location code 020503. In order + to make sense of the data, these codes must be reconciled with their official definitions.

+ +

Requires: + AssociationOfCodeValuesWithExternalDefinitions.

+ +

A snippet of an example of a tabular HXL data file is provided below. A local copy + of the HXL data file is also available: + HXL_3W_samples_draft_Multilingual.csv.

+ +
Example 41
Fecha del informe,      Fuente,     Implementador,Código de sector,       Sector / grupo,   Sector / group,    Subsector,     País,Código de provincia, Province,    Region,Código del municipio,Municipality
+   #date+reported,#meta+source,              #org,    #sector+code,           #sector+es,       #sector+en,#subsector+en, #country,         #adm1+code, #adm1+en,#region+en,          #adm2+code,    #adm2+en
+       2013-11-19,Mapaction OP,      World VISION,             S01,Refugio de emergencia,Emergency Shelter,             ,Filipinas,           60400000,    Aklan,        VI,                    ,
+       2013-11-19,   DHNetwork,DFID Medical Teams,             S02,                Salud,           Health,             ,         ,           60400000,    Aklan,        VI,                    ,
+       2013-11-19,   DHNetwork,               MSF,             S02,                Salud,           Health,             ,         ,           60400000,    Aklan,        VI,                    ,
+       2013-11-19,  Cluster 3W,     LDS Charities,             S03,                 WASH,             WASH,      Hygiene,Filipinas,           60400000,    Aklan,        VI,                    ,
+{snip}
+ +

(whitespace included for clarity)

+ +
+
+

2.24 Use Case #24 - Expressing a hierarchy within occupational listings

+

+ (Contributed by Dan Brickley) +

+ +

Our user intends to analyze the current state of the job market using information gleaned + from job postings that are published using schema.org markup.

+ +
Note

schema.org defines a schema for a listing that describes a + job opening within an organization: JobPosting.

+ +

One of the things our user wants to do is to organise the job postings into categories + based on the occupationalCategory + property of each JobPosting.

+ +

The occupationalCategory property is used to categorize the described job. The + O*NET-SOC Taxonomy is schema.org's recommended + controlled vocabulary for the occupational categories.

+ +

The schema.org documentation notes that value of the occupationalCategory property + should include both the textual label and the formal code from the O*NET-SOC Taxonomy, as + illustrated below in the following RDFa snippet:

+ +
Example 42
<br><strong>Occupational Category:</strong> <span property="occupationalCategory">15-1199.03 Web Administrators</span>
+ +

The O*NET-SOC Taxonomy is republished every few years; the + occupational listing for 2010 + is the most recent version available. This + listing is also available in CSV format. + An extract from this file is provided below. A local copy of this CSV file is also available: + file = 2010_Occupations.csv.

+ +
Example 43
O*NET-SOC 2010 Code,O*NET-SOC 2010 Title,O*NET-SOC 2010 Description
+{snip}
+15-1199.00,"Computer Occupations, All Other",All computer occupations not listed separately.
+15-1199.01,Software Quality Assurance Engineers and Testers,Develop and execute software test plans in order to identify software problems and their causes.
+15-1199.02,Computer Systems Engineers/Architects,"Design and develop solutions to complex applications problems, system administration issues, or network concerns. Perform systems management and integration functions."
+15-1199.03,Web Administrators,"Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications."
+15-1199.04,Geospatial Information Scientists and Technologists,"Research or develop geospatial technologies. May produce databases, perform applications programming, or coordinate projects. May specialize in areas such as agriculture, mining, health care, retail trade, urban planning, or military intelligence."
+15-1199.05,Geographic Information Systems Technicians,"Assist scientists, technologists, or related professionals in building, maintaining, modifying, or using geographic information systems (GIS) databases. May also perform some custom application development or provide user support."
+15-1199.06,Database Architects,"Design strategies for enterprise database systems and set standards for operations, programming, and security. Design and construct large relational databases. Integrate new systems with existing warehouse structure and refine system performance and functionality."
+15-1199.07,Data Warehousing Specialists,"Design, model, or implement corporate data warehousing activities. Program and configure warehouses of database information and provide support to warehouse users."
+15-1199.08,Business Intelligence Analysts,Produce financial and market intelligence by querying data repositories and generating periodic reports. Devise methods for identifying data patterns and trends in available information sources.
+15-1199.09,Information Technology Project Managers,"Plan, initiate, and manage information technology (IT) projects. Lead and guide the work of technical staff. Serve as liaison between business and technical aspects of projects. Plan project stages and assess business implications for each stage. Monitor progress to assure deadlines, standards, and cost targets are met."
+15-1199.10,Search Marketing Strategists,"Employ search marketing tactics to increase visibility and engagement with content, products, or services in Internet-enabled devices or interfaces. Examine search query behaviors on general or specialty search engines or other Internet-based content. Analyze research, data, or technology to understand user intent and measure outcomes for ongoing optimization."
+15-1199.11,Video Game Designers,"Design core features of video games. Specify innovative game and role-play mechanics, story lines, and character biographies. Create and maintain design documentation. Guide and collaborate with production staff to produce games as designed."
+15-1199.12,Document Management Specialists,"Implement and administer enterprise-wide document management systems and related procedures that allow organizations to capture, store, retrieve, share, and destroy electronic records and documents."
+{snip}
+ +

The CSV file follows the specification outlined in [RFC4180] - including the use of + pairs of double quotes ("") to escape cells that themselves contain commas.

+ +

Also note that each row provides a unique identifier for the occupation it describes. This + unique identifier is given in the O*NET-SOC 2010 Code column. This code can be considered + as the primary key for each row in the listing as it is unique for every row. Furthermore, the value + of the O*NET-SOC 2010 Code column serves as the unique identifier for the occupation.

+ +

Requires: + PrimaryKey.

+ +

Closer inspection of the O*NET-SOC 2010 code illustrates the hierarchical classification + within the taxonomy. The first six digits are based on the + Standard Occupational Classification (SOC) + code from the US Bureau of Labor Statistics, with further subcategorization thereafter where necessary. + The first and second digits represent the major group; the third digit represents the minor group; + the fourth and fifth digits represent the broad occupation; and the sixth digit represents the + detailed occupation.

+ +

The SOC structure (2010) is available in Microsoft Excel 97-2003 Workbook format. + An extract of this structure, in CSV format (exported from Microsoft Excel 2007), is provided below. + A local copy of the SOC structure in CSV is also available: file = soc_structure_2010.csv.

+ +
Example 44
Bureau of Labor Statistics,,,,,,,,,
+On behalf of the Standard Occupational Classification Policy Committee (SOCPC),,,,,,,,,
+,,,,,,,,,
+January 2009,,,,,,,,,
+*** This is the final structure for the 2010 SOC.   Questions should be emailed to soc@bls.gov***,,,,,,,,,
+,,,,,,,,,
+,,,,,,,,,
+,,,,,,,,,
+,,,,,,,,,
+,2010 Standard Occupational Classification,,,,,,,,
+,,,,,,,,,
+Major Group,Minor Group,Broad Group,Detailed Occupation,,,,,,
+,,,,,,,,,
+11-0000,,,,Management Occupations,,,,,
+,11-1000,,,Top Executives,,,,,
+,,11-1010,,Chief Executives,,,,,
+,,,11-1011,Chief Executives,,,,,
+{snip}
+,,,13-2099,"Financial Specialists, All Other",,,,,
+15-0000,,,,Computer and Mathematical Occupations,,,,,
+,15-1100,,,Computer Occupations,,,,,
+,,15-1110,,Computer and Information Research Scientists,,,,,
+,,,15-1111,Computer and Information Research Scientists,,,,,
+,,15-1120,,Computer and Information Analysts,,,,,
+,,,15-1121,Computer Systems Analysts,,,,,
+,,,15-1122,Information Security Analysts,,,,,
+,,15-1130,,Software Developers and Programmers,,,,,
+,,,15-1131,Computer Programmers,,,,,
+,,,15-1132,"Software Developers, Applications",,,,,
+,,,15-1133,"Software Developers, Systems Software",,,,,
+,,,15-1134,Web Developers,,,,,
+,,15-1140,,Database and Systems Administrators and Network Architects,,,,,
+,,,15-1141,Database Administrators,,,,,
+,,,15-1142,Network and Computer Systems Administrators,,,,,
+,,,15-1143,Computer Network Architects,,,,,
+,,15-1150,,Computer Support Specialists,,,,,
+,,,15-1151,Computer User Support Specialists,,,,,
+,,,15-1152,Computer Network Support Specialists,,,,,
+,,15-1190,,Miscellaneous Computer Occupations,,,,,
+,,,15-1199,"Computer Occupations, All Other",,,,,
+,15-2000,,,Mathematical Science Occupations,,,,,
+{snip}
+ +

The header line here comes below an empty row and is separated from the data by another empty row. + There is metadata about the table in the rows above the header line.

+ +

Requires: + MultipleHeadingRows and + AnnotationAndSupplementaryInfo.

+ +

Being familiar with SKOS, our user decides + to map both the O*NET-SOC and SOC taxonomies into a single hierarchy expressed using RDF/OWL and the + SKOS vocabulary.

+ +

Note that in order to express the two taxonomies in SKOS, the local identifiers used in + the CSV files (e.g. 15-1199.03) must be mapped to URIs.

+ +

Requires: + URIMapping.

+ +

Each of the five levels used across the occupation classification schemes are assigned to a particular OWL class - each of which is a sub-class of skos:Concept:

+
    +
  • From SOC - +
      +
    • Major Group: ex:SOC-MajorGroup
    • +
    • Minor Group: ex:SOC-MinorGroup
    • +
    • Broad Group: ex:SOC-BroadGroup
    • +
    • Detailed Occupation: ex:SOC-DetailedOccupation
    • +
    +
  • +
  • From O*NET-SOC - +
      +
    • ex:ONETSOC-Occupation
    • +
    +
  • +
+ +

The SOC taxonomy contains four different types of entities, and so requires several different + passes to extract each of those from the CSV file. Depending on which kind of entity is being + extracted, a different column provides the unique identifier for the entity. Data in a given + row is only processed if the value for the cell designated as the unique identifier is not blank. + For example, if the Detailed Occupation column is designated as providing the + unique identifier (e.g. to extract entities of type ex:SOC-DetailedOccupation), + then the only rows to be processed in the snippet below would be "Financial Specialists, All Other", + "Computer and Information Research Scientists" and "Computer Occupations, All Other". All other rows + would be ignored.

+ +
Example 45
{snip}
+Major Group,Minor Group,Broad Group,Detailed Occupation,                                            ,,,,,
+           ,           ,           ,                   ,                                            ,,,,,
+{snip}
+           ,           ,           ,            13-2099,          "Financial Specialists, All Other",,,,,
+    15-0000,           ,           ,                   ,       Computer and Mathematical Occupations,,,,,
+           ,    15-1100,           ,                   ,                        Computer Occupations,,,,,
+           ,           ,    15-1110,                   ,Computer and Information Research Scientists,,,,,
+           ,           ,           ,            15-1111,Computer and Information Research Scientists,,,,,
+{snip}
+           ,           ,    15-1190,                   ,          Miscellaneous Computer Occupations,,,,,
+           ,           ,           ,            15-1199,           "Computer Occupations, All Other",,,,,
+           ,    15-2000,           ,                   ,            Mathematical Science Occupations,,,,,
+{snip}
+ +

(whitespace added for clarity)

+ +

Requires: + ConditionalProcessingBasedOnCellValues.

+ +

The hierarchy in the SOC structure is implied by inheritance from + the preceeding row(s). For example, the row describing SOC minor group "Computer Occupations" + (Minor Group = 15-1100 (above) has an empty cell value for column Major Group. + The value for SOC major group is provided by the preceeding row. In the case of SOC detailed + occupation "Computer Occupations, All Other" (Detailed Occupation = 15-1199), + the value of value for column Major Group is provided 20 lines previously when a value + in that column was most recently provided. The example snippet below illustrates what the CSV would + look like if the inherited cell values were present:

+ + +
Example 46
{snip}
+Major Group,Minor Group,Broad Group,Detailed Occupation,                                            ,,,,,
+           ,           ,           ,                   ,                                            ,,,,,
+{snip}
+    13-0000,    13-2000,    13-2090,            13-2099,          "Financial Specialists, All Other",,,,,
+    15-0000,           ,           ,                   ,       Computer and Mathematical Occupations,,,,,
+    15-0000,    15-1100,           ,                   ,                        Computer Occupations,,,,,
+    15-0000,    15-1100,    15-1110,                   ,Computer and Information Research Scientists,,,,,
+    15-0000,    15-1100,    15-1110,            15-1111,Computer and Information Research Scientists,,,,,
+{snip}
+    15-0000,    15-1100,    15-1190,                   ,          Miscellaneous Computer Occupations,,,,,
+    15-0000,    15-1100,    15-1190,            15-1199,           "Computer Occupations, All Other",,,,,
+    15-0000,    15-2000,           ,                   ,            Mathematical Science Occupations,,,,,
+{snip}
+ +

(whitespace added for clarity)

+ +

It is difficult to programatically describe how the inherited values should be implemented. + It is not as simple as infering the value for a blank cell from the most recent preceeding row + when a non-blank value was provided for that column. For example, the last row in the example + above describing "Mathematical Science Occupations" does not inherit the values from columns + Broad Group and Detailed Occupation in the preceeding row because + it describes a new level in the hierarchy.

+ +

However, given that the SOC code is a string value with regular structure that reflects + the position of a given concept within the hierarchy, it is possible to determine the + identifier of each of the broader concepts by parsing the identifier string. For example, + the regular expression /^(\d{2})-(\d{2})(\d)\d$/ could be used to split the + identifier for a detailed occupation code into its constituent parts from which the + identifiers for the associated broader concepts could be constructed.

+ +

Requires: + CellMicrosyntax.

+ +

The same kind of processing applies to the O*NET-SOC taxonomy; in this case also extracting + a description for the occupation. There is also an additional complication: where a + O*NET-SOC code ends in ".00", that occupation is a direct mapping to the + occupation defined in the SOC taxonomy. For example, the O*NET-SOC code 15-1199.00 + refers to the same occupation category as the SOC code 15-1199: + "Computer Occupations, All Other"

+ +

To implement this complication, we need to use conditional processing.

+

If the final two digits of the O*NET-SOC code are "00", then:

+
    +
  • the entity is of type ex:SOC-DetailedOccupation;
  • +
  • the unique identifier and notation for the concept comprises only the six numerical digits of the O*NET-SOC 2010 Code cell value (e.g. in the form nn-nnnn); and
  • +
  • no skos:broader relationship need be defined.
  • +
+ +

else:

+
    +
  • the entity is of type ex:ONETSOC-Occupation;
  • +
  • the unique identifier and notation for the concept comprises the eight numerical digits of the O*NET-SOC 2010 Code cell value (e.g. in the form nn-nnnn.nn); and
  • +
  • a skos:broader relationship is defined with the broader concept from the SOC taxonomy identified by the first six numerical digits of the O*NET-SOC 2010 Codecell value.
  • +
+ +

The example below illustrates the conditional behaviour:

+ +
Example 47
row:
+----
+
+15-1199.00,"Computer Occupations, All Other",All computer occupations not listed separately.
+
+resulting RDF (in Turtle syntax):
+---------------------------------
+
+ex:15-1199 a ex:SOC-DetailedOccupation ;
+    skos:notation "15-1199" ;
+    skos:prefLabel "Computer Occupations, All Other" ;
+    dct:description "All computer occupations not listed separately." .
+
+row:
+----
+
+15-1199.03,Web Administrators,"Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications."
+
+resulting RDF (in Turtle syntax):
+---------------------------------
+
+ex:15-1199.03 a ex:ONETSOC-Occupation ;
+    skos:notation "15-1199.03" ;
+    skos:prefLabel "Web Administrators" ;
+    dct:description "Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications." ;
+    skos:broader ex:15-1199 .
+ +

Requires: + ConditionalProcessingBasedOnCellValues.

+ +

A snippet of the final SKOS concept scheme, expressed in RDF using Turtle [turtle] syntax, resulting + from transformation of the O*NET-SOC and SOC taxonomies into RDF is provided below. Ideally, all + duplicate triples will be removed - such as the skos:prefLabel + property for concept ex:15-1190 which would be provided by both the O*NET-SOC and SOC CSV files.

+ +
Example 48
ex:15-0000 a ex:SOC-MajorGroup ;
+    skos:notation "15-0000" ;
+    skos:prefLabel "Computer and Mathematical Occupations" .
+ex:15-1100 a ex:SOC-MinorGroup ;
+    skos:notation "15-1100" ;
+    skos:prefLabel "Computer Occupations" ;
+    skos:broader ex:15-0000 .
+ex:15-1190 a ex:SOC-BroadGroup ;
+    skos:notation "15-1190" ;
+    skos:prefLabel "Miscellaneous Computer Occupations" ;
+    skos:broader ex:15-0000, ex:15-1100 .
+ex:15-1199 a ex:SOC-DetailedOccupation ;
+    skos:notation "15-1199" ;
+    skos:prefLabel "Computer Occupations, All Other" ;
+    dct:description "All computer occupations not listed separately." ;
+    skos:broader ex:15-0000, ex:15-1100, ex:15-1190 .
+ex:15-1199.03 a ex:ONETSOC-Occupation ;
+    skos:notation "15-1199.03" ;
+    skos:prefLabel "Web Administrators" ;
+    dct:description "Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications." ;
+    skos:broader ex:15-0000, ex:15-1100, ex:15-1190, ex:15-1199 .
+ +

Once the SKOS concept scheme has been defined, it is possible for our user to group + job postings by SOC Major Group, SOC Minor Group, SOC Broad Group, SOC Detailed Occupation + and O*NET-SOC Occupation to provide summary statistics about the job market.

+ +

For example, we can use the SKOS concept scheme to group job postings for "Web Administrators" (code 15-1199.03) as follows:

+
    +
  • 15-0000 "Computer and Mathematical Occupations" (SOC major group)
  • +
  • 15-1100 "Computer Occupations" (SOC minor group)
  • +
  • 15-1190 "Miscellaneous Computer Occupations" (SOC broad occupation)
  • +
  • 15-1199 "Computer Occupations, All Other" (SOC detailed occupation)
  • +
  • 15-1199.03 "Web Administrators"
  • +
+
+
+

2.25 Use Case #25 - Consistent publication of local authority data

+

Open data and transparency are foundational elements within the UK Government's approach to improve public service. The Local Government Association (LGA) promotes open and transparent local government to meet local needs and demands; to innovate and transform services leading to improvements and efficiencies, to drive local economic growth and to empower citizen and community groups to choose or run services and shape neighbourhoods.

+ +

As part of this initiative, the LGA is working to put local authority data into the public realm in ways that provide real benefits to citizens, business, councils and the wider data community. The LGA provides a web portal to help identify open data published by UK local authorities and encourage standardisation of local open data; enabling data consumers to browse through datasets published by local authorities across the UK and providing guidance and tools to data publishers to drive consistent practice in publication.

+ +

Data is typically published in CSV format.

+ +

An illustrative example is provided for data describing public toilets. The portal lists datasets of information about public toilets provided by more than 70 local authorities. In order to ensure consistent publication of data about public toilets the LGA provides both guidance documentation and a machine-readable schema against which datasets may be validated using on-line tools.

+ +

The public toilets CSV schema has 32 (mandated or optional) fields. The validator tool allows columns to appear in any order, matching the column order to the schema based on the title in the column header. Furthermore, CSV files containing additional columns, such as SecureDisposalofSharps specified within the public toilet dataset for Bath and North East Somerset (as shown below), are also considered valid. Additional columns are included where one or more local authorities have specific requirements to include additional information to satisfy local needs. Such additional columns are not supported using formal 'extensions' of the schema as the organisational and administrative burden of doing so was considered too great.

+ +
Example 49
ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,LocationText,StreetAddress,LocalityAddress,TownAddress,Postcode,GeoAreaWardURI,GeoAreaWardLabel,UPRN,CoordinateReferenceSystem,GeoX,GeoY,GeoPointLicensingURL,Category,AccessibleCategory,BabyChange,SecureDisposalofSharps,OpeningHours,ManagingBy,ChargeAmount,Notes
+15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,CHARLOTTE STREET ENTRANCE,CHARLOTTE STREET,KINGSMEAD,BATH,BA1 2NE,http://statistics.data.gov.uk/id/statistical-geography/E05001949,Kingsmead,10001147066,OSGB36,374661,165006,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,TRUE,TRUE,24 Hours ,BANES COUNCIL AND HEALTHMATIC,0.2,
+15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,ALICE PARK,GLOUCESTER ROAD,LAMBRIDGE,BATH,BA1 7BL,http://statistics.data.gov.uk/id/statistical-geography/E05001950,Lambridge,10001146447,OSGB36,376350,166593,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,TRUE,TRUE,06:00-21:00,BANES COUNCIL AND HEALTHMATIC,0.2,
+15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,HENRIETTA PARK,HENRIETTA ROAD,ABBEY,BATH,BA2 6LU,http://statistics.data.gov.uk/id/statistical-geography/E05001935,Abbey,10001147120,OSGB36,375338,165170,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,FALSE,Female and male,Winter & Su 10:00-16:00 | Other times: 08:00-18:00,BANES COUNCIL AND HEALTHMATIC,0,Scheduled for improvement Autumn 2014
+15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,SHAFTESBURY ROAD,SHAFTESBURY ROAD,OLDFIELD ,BATH,BA2 3LH,http://statistics.data.gov.uk/id/statistical-geography/E05001958,Oldfield,10001147060,OSGB36,373809,164268,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,TRUE,TRUE,24 Hours ,BANES COUNCIL AND HEALTHMATIC,0.2,
+{snip}
+ +

A local copy of this dataset is included for convenience.

+ +

Requires: + WellFormedCsvCheck, + CsvValidation and + SyntacticTypeDefinition. +

+
+
+ +
+

3. Requirements

+
+

3.1 Accepted requirements

+
+

3.1.1 CSV parsing requirements

+
+
R-NonStandardCellDelimiter
+
+ + Ability to parse tabular data with cell delimiters other than comma (,) + +

Tabular data is often provided with cell delimiters other than comma (,). + Fixed width formatting is also commonly used.

+

If a non-standard cell delimiter is used, it shall be possible to inform the CSV parser about the + cell delimiter or fixed-width formatting.

+

+ Motivation: + DisplayingLocationsOfCareHomesOnAMap, + SurfaceTemperatureDatabank, + SupportingSemantic-basedRecommendations, + PublicationOfBiodiversityInformation and + PlatformIntegrationUsingSTDF. +

+
Note
+

Standardizing the parsing of CSV is outside the chartered scope of the Working Group. However, [tabular-data-model] section 8. Parsing Tabular Data provides non-normative hints to creaters of parsers to help them handle the wide variety of CSV-based formats that they may encounter due to the current lack of standardization of the format.

+

An annotated table may use the delimiter annotation, specified as part of a dialect description, to declare a string that is used to delimit cells in a given row. The default value is ",". See [tabular-metadata] section 5.9 Dialect Descriptions for further details.

+
+
+
R-CommentLines
+
+ + Ability to identify comment lines within a CSV file and skip over them during parsing, format conversion or other processing + +

A tabular datafile may include comment lines. It shall be possible to declare how to recognize + a comment line within the data (e.g. by specifying a sequence of characters that are found + at the beginning of every comment line).

+

Comment lines shall not be treated as data when parsing, converting or processing the + CSV file. During format conversion, the application may try to include the comment in the + conversion.

+

+ Motivation: + PlatformIntegrationUsingSTDF. +

+
Note
+

Standardizing the parsing of CSV is outside the chartered scope of the Working Group. However, [tabular-data-model] section 8. Parsing Tabular Data provides non-normative hints to creaters of parsers to help them handle the wide variety of CSV-based formats that they may encounter due to the current lack of standardization of the format.

+

An annotated table may use the comment prefix annotation, specified as part of a dialect description, to declare a string that, when appearing at the beginning of a row, indicates that the row is a comment that should be associated as a rdfs:comment annotation to the table. The default value is "#". See [tabular-metadata] section 5.9 Dialect Descriptions for further details.

+
+
+
+
+
+

3.1.2 Applications requirements

+
+
R-CsvValidation
+
+ + Ability to validate a CSV for conformance with a specified metadata definition + +

The content of a CSV often needs to be validated for conformance against a + specification. A specification may be expressed in machine-readable format as defined in + the Metadata Vocabulary for Tabular Data [tabular-metadata].

+

Validation shall assess conformance against structural definitions such as number of + columns and the datatype for a given column. Further validation needs are to be + determined. It is anticipated that validation may vary based on row-specific attributes + such as the type of entity described in that row.

+

+ Dependency: + R-WellFormedCsvCheck +

+

+ Motivation: + DigitalPreservationOfGovernmentRecords, + OrganogramData, + ChemicalImaging, + ChemicalStructures, + DisplayingLocationsOfCareHomesOnAMap, + NetCdFcDl, + PaloAltoTreeData and + ConsistentPublicationOfLocalAuthorityData. +

+
Note
+

Validation of tabular data, as specified in [tabular-data-model] section 6.6 Validating Tables, includes the following aspects:

+
    +
  • assessing compatibility of the table with associate metadata - checking the correct number of non-virtual columns and matching names/titles for columns where these are specficied in a header row;
  • +
  • ensuring uniqueness of primary keys;
  • +
  • checking that all foreign keys are valid; and
  • +
  • cell validation.
  • +
+

As described in [tabular-data-model] section 4.6 Datatypes, cell validation includes assessment of the literal content of the cell (e.g. length of string or number of bytes) and of the value inferred from parsing that literal content (e.g. formatting and numerical constraints).

+
+
+
R-RightToLeftCsvDeclaration
+
+ + Ability to determine that a CSV should be rendered using RTL column ordering and RTL text direction in cells. + +

It shall be possible to declare whether a given tabular data file should be rendered with column order direction Right-to-Left (RTL); e.g. the first column on the far right, with subsequent columns displayed to the left of the preceeding column. It shall also be possible to declare that the content of cells in particular columns are rendered RTL.

+

A "RTL aware" application should use the RTL declaration to determine how to display the a given data file. Automatic detection of appropriate rendering shall be the default behaviour (in absence of any such declaration).

+
Note
+

The directionality of the content does not affect the logical structure of the tabular data; i.e. the cell at index zero is followed by the cell at index 1, and then index 2 etc. As a result, parsing of RTL tabular data is anticipated to be identical to LTR content.

+
+

+ Motivation: + SupportingRightToLeftDirectionality. +

+
Note

+ It is possible to set the column direction using the tableDirection property and the text direction on columns using the textDirection property, as defined in [tabular-metadata]. +

+
+
R-CsvToRdfTransformation
+
+ + Ability to transform a CSV into RDF + +

Standardised CSV to RDF transformation mechanisms mitigate the need for bespoke + transformation software to be developed by CSV data consumers, thus simplifying the + exploitation of CSV data. Local identifiers for the entity described in a given row or + used to reference some other entity need to be converted to URIs. RDF properties + (or property paths) need to be determined to relate the entity described + within a given row to the corresponding data values for that row. Where available, + the type of a data value should be incorporated in the resulting RDF. Built-in types + defined in RDF 1.1 [rdf11-concepts] (e.g. + xsd:dateTime, + xsd:integer + etc.) and types defined in other RDF vocabularies / OWL ontologies (e.g. geo:wktLiteral, + GeoSPARQL [geosparql] section 8.5.1 RDFS Datatypes refers) shall be supported.

+

+ Dependency: + R-SemanticTypeDefinition, + R-SyntacticTypeDefinition and + R-URIMapping. +

+

+ Motivation: + DigitalPreservationOfGovernmentRecords, + OrganogramData, + PublicationOfPropertyTransactionData, + RepresentingEntitiesAndFactsExtractedFromText, + CanonicalMappingOfCSV, + PublicationOfBiodiversityInformation and + ExpressingHierarchyWithinOccupationalListings. +

+
Note
+

[csv2rdf] specifies the transformation of an annotated table to RDF; providing both minimal mode, where RDF output includes triples derived from the data within the annotated table, and standard mode, where RDF output additionally includes triples describing the structure of the annotated table.

+

Built-in datatypes are limited to those defined in [tabular-data-model] section 4.6 Datatypes. geo:wktLiteral and other datatypes from [geosparql] are not supported natively.

+
+
+
R-CsvToJsonTransformation
+
+ + Ability to transform a CSV into JSON + +

Standardised CSV to JSON transformation mechanisms mitigate the need for bespoke transformation software to be developed by CSV data consumers, thus simplifying the exploitation of CSV data.

+

+ Motivation: + DisplayingLocationsOfCareHomesOnAMap, + IntelligentlyPreviewingCSVFiles, + CanonicalMappingOfCSV and + PublicationOfBiodiversityInformation. +

+
Note
+

[csv2json] specifies the transformation of an annotated table to JSON; providing both minimal mode, where JSON output includes objects derived from the data within the annotated table, and standard mode, where JSON output additionally includes objects describing the structure of the annotated table. In both modes, the transformation provides 'prettyfication' of the JSON output where objects are nested rather than forming a flat list of objects with relations.

+

Built-in datatypes from the annotated table, as defined in [tabular-data-model] section 4.6 Datatypes, are mapped to JSON primitive types.

+
+
+
R-CanonicalMappingInLieuOfAnnotation
+
+ + Ability to transform CSV conforming to the core tabular data model yet lacking further + annotation into a object / object graph serialisation + +

A CSV conforming with the + core tabular data model [tabular-data-model], yet lacking + any annotation that defines rich semantics for that data, shall be able to be transformed into + an object / object graph serialisation such as JSON, XML or RDF using systematic rules - a "canonical" + mapping.

+

The canonical mapping should provide automatic scoping of local identifiers (e.g. conversion to + URI), identification of primary keys and detection of data types.

+

+ Motivation: + CanonicalMappingOfCSV. +

+
Note
+

An annotated table is always generated by applications implementing this specification when processing tabular data; albeit that without supplementary metadata, those annotations are limited (e.g. the titles annotation may be populated from the column headings provided within the tabular data file). Transformations to both RDF and JSON operate on the annotated table, therefore, a canonical transformation is achieved by transforming an annotated table that has not been informed by supplementary metadata.

+
+
+
R-IndependentMetadataPublication
+
+ + Ability to publish metadata independently from the tabular data resource it describes + +

Commonly, tabular datasets are published without the supplementary metadata that enables a third party to + correctly interpret the published information. An independent party - in addition to the data publisher - + shall be able to publish metadata about such a dataset, thus enabling a community of users to benefit from + the efforts of that third party to understand that dataset.

+

+ Dependency: + R-LinkFromMetadataToData and + R-ZeroEditAdditionOfSupplementaryMetadata. +

+

+ Motivation: + MakingSenseOfOtherPeoplesData and + PublicationOfBiodiversityInformation. +

+
Note
+

[tabular-metadata] specifies the format and structure of a metadata file that may be used to provide supplementary annotations on an annotated table or group of tables.

+
+
+
R-SpecificationOfPropertyValuePairForEachRow
+
+ + Ability to define a property-value pair for inclusion in each row + +

When annotating tabular data, it should be possible for one to define within the metadata + a property-value pair that is repeated for every row in the tabular dataset; for example, + the location ID for a set of weather observations, or the dataset ID for a set of + biodiversity observations.

+

In the case of sparsely populated data, this property-value pair must be applied as a + default only where that property is absent from the data.

+

As an illustration, the Darwin Core Archive standard + provides the ability to specify such a property value pair within its metadata description + file meta.xml.

+ +
Example 50
http://data.gbif.org/download/specimens.csv
+-------------------------------------------
+
+ID,Species,Count
+123,"Cryptantha gypsophila Reveal & C.R. Broome",12
+124,"Buxbaumia piperi",2
+
+meta.xml
+--------
+
+<archive xmlns="http://rs.tdwg.org/dwc/text/">
+  <core ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord">
+    <files>
+      <location>http://data.gbif.org/download/specimens.csv</location>
+    </files>
+    <field index="0" term="http://rs.tdwg.org/dwc/terms/catalogNumber" />
+    <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName" />
+    <field index="2" term="http://rs.tdwg.org/dwc/terms/individualCount" />
+    <field term="http://rs.tdwg.org/dwc/terms/datasetID" default="urn:lsid:tim.lsid.tdwg.org:collections:1"/>
+  </core>
+</archive>
+

Thus the original tabular data file specimens.csv is interpreted as:

+
Example 51
catalogNumber,scientificName,individualCount,datasetID
+123,"Cryptantha gypsophila Reveal & C.R. Broome",12,urn:lsid:tim.lsid.tdwg.org:collections:1
+124,"Buxbaumia piperi",2,urn:lsid:tim.lsid.tdwg.org:collections:1
+ +

+ Motivation: + PublicationOfBiodiversityInformation. +

+
Note
+

To meet this requirement a virtual column, as specified in [tabular-data-model], must be specified for the additional property-value pair that is to be included in each row. The default annotation may be used to specify a string value that is used for every empty cell in the associated column. Alternatively, the value URL annotation provides an absolute URL for a given cell. [tabular-metadata] specifies how a URI Template, specified in [RFC6570], may be used to specify the value URL using the valueURL property.

+
+
+
R-ZeroEditAdditionOfSupplementaryMetadata
+
+ + Ability to add supplementary metadata to an existing CSV file without + requiring modification of that file + +

It may not be possible for a tabular data file to be modified to include the supplementary + metadata required to adequately describe the content of the data file. For example, the + data may be published by a third party or the user may be constrained in their workflow + by choice of tools that do not support or even recognize the supplementary metadata.

+

It shall be possible to add provide annotations about a given tabular data file without + requiring that file to be modified in any way; "zero-edit" addition.

+

+ Dependency: + R-LinkFromMetadataToData. +

+

+ Motivation: + PublicationOfNationalStatistics, + SurfaceTemperatureDatabank, + MakingSenseOfOtherPeoplesData and + PublicationOfBiodiversityInformation. +

+
Note
+

Please refer to R-CanonicalMappingInLieuOfAnnotation for details of the requirement to transform a tabular data lacking any supplementary metadata.

+
+
Note
+

[tabular-metadata] specifies the format and structure of a metadata file that may be used to provide supplementary annotations on an annotated table or group of tables. Through use of such a metadata file, one may provide supplementary annotations without needing to edit the source tabular data file. Applications may use alternative mechanisms to gather annotations on an annotated table or group of tables.

+
+
+
R-LinkFromMetadataToData
+
+ + Ability for a metadata description to explicitly cite the tabular dataset it describes + +

Metadata resources may be published independently from the tabular dataset(s) it describes; e.g. a third + party may publish metadata in their own domain that describes how they have interpreted the data for their + application or community. In such a case, the relationship between the metadata and data resources cannot + be inferred - it must be stated explicitly.

+

Such a link between metadata and data resources should be discoverable, thus enabling a data publisher to + determine who is referring to their data leading to the data publisher gaining a better understanding + of their user community.

+

+ Motivation: + MakingSenseOfOtherPeoplesData and + PublicationOfBiodiversityInformation. +

+
Note
+

In addition to providing mechanisms to locate metadata relating to a tabular data file (see [tabular-data-model] section 5. Locating Metadata), the url annotation is used to define URL of the source data for an annotated table; for example, referring to a specific CSV file.

+
+
+
+
+
+

3.1.3 Data model requirements

+
+
R-PrimaryKey
+
+ + Ability to determine the primary key for rows within a tabular data file + +

It shall be possible to uniquely identify every row within a tabular data file. The + default behaviour for uniquely identifying rows is to use the row number. However, some + datasets already include a unique identifier for each row in the dataset. In such + cases, it shall be possible to declare which column provides the primary key.

+

+ Motivation: + DigitalPreservationOfGovernmentRecords, + OrganogramData, + ChemicalImaging, + PaloAltoTreeData and + ExpressingHierarchyWithinOccupationalListings. +

+
Note
+

The primary key annotation, as specified in [tabular-data-model], may be used to define a primary key. Primary keys may be compiled from multiple values in a given row.

+
+
+
R-ForeignKeyReferences
+
+ + Ability to cross reference between CSV files + +

To interpret data in a given row of a CSV file, one may need to be able to refer to + information provided in supplementary CSV files or elsewhere within the same CSV file; + e.g. using a foreign key type reference. The cross-referenced CSV files may, or may + not, be packaged together.

+

+ Motivation: + DigitalPreservationOfGovernmentRecords, + OrganogramData, + SurfaceTemperatureDatabank, + RepresentingEntitiesAndFactsExtractedFromText and + SupportingSemantic-basedRecommendations. +

+
Note
+

The foreign keys annotation, as specified in [tabular-data-model], may be used to provide a list of foreign keys for an annotated table. To successfully validate, any cell value in a column referenced by the foreign key statement must have a unique value in the column of the referenced annotated table.

+

As an alternative to the strong validation provided by foreign keys, references or links between rows may be asserted. The target must be identified by URI as is defined using the value URL annotation, as specified in [tabular-data-model]. Where the target is defined in another annotated table, the identity of the subject (or subjects) which the row in that table describes is defined using the about URL annotation for the cells in the target row.

+
+
+
R-AnnotationAndSupplementaryInfo
+
+ + Ability to add annotation and supplementary information to CSV file + +

Annotations and supplementary information may be associated with:

+
    +
  • a group of tables
  • +
  • an entire table
  • +
  • a row
  • +
  • a column
  • +
  • an individual cell
  • +
  • range (or region) of cells within a table
  • +
+

Annotations and supplementary information may be literal values or references to a + remote resource. The presence of annotations or supplementary information must not + adversely impact parsing of the tabular data (e.g. the annotations and supplementary + information must be logically separate).

+ +
Note
+

This requirement refers to provision of human-readable annotation providing additional context to a group of tables, table, column, row, cell or other region within a table. For example, the publication of national statistics use case adds the following annotations to a table:

+
    +
  • title: Economic activity
  • +
  • dimensions: Economic activity (T016A), 2011 Administrative Hierarchy, 2011 Westminster Parliamentary Constituency Hierarchy
  • +
  • dataset population: All usual residents aged 16 to 74
  • +
  • coverage: England and Wales
  • +
  • area types (list omitted here for brevity)
  • +
  • textual description of dataset
  • +
  • publication information
  • +
  • contact details
  • +
+

This is disjoint from the requirements regarding the provision of supplementary + metadata to describe the content and structure of a tabular data file in a machine + readable form.

+
+ +

+ Motivation: + PublicationOfNationalStatistics, + SurfaceTemperatureDatabank, + PublicationOfPropertyTransactionData, + AnalyzingScientificSpreadsheets, + ReliabilityAnalyzesOfPoliceOpenData, + OpenSpendingData, + RepresentingEntitiesAndFactsExtractedFromText, + IntelligentlyPreviewingCSVFiles, + CanonicalMappingOfCSV, + SupportingSemantic-basedRecommendations, + MakingSenseOfOtherPeoplesData, + PublicationOfBiodiversityInformation, + ExpressingHierarchyWithinOccupationalListings and + PlatformIntegrationUsingSTDF. +

+
Note
+

Any annotation may be used in addition to the core annotations specified in [tabular-data-model], such as title, author, license etc. [tabular-metadata] section 5.8 Common Properties describes how such 'non-core' annotations are provided in a supplementary metadata file.

+

Any number of additional annotations may be provided for a group of tables or an annotated table; see table-group-notes and table-notes respectively.

+
+
Note

The Web Annotation Working Group is developing a vocabulary for expressing annotations. An example use of the table-notes annotation and the Web Annotation Working Group's open annotation vocabulary is provided in [csv2rdf].

+
+
R-AssociationOfCodeValuesWithExternalDefinitions
+
+ + Ability to associate a code value with externally managed definition + +

CSV files make frequent use of code values when describing data. Examples include: + geographic regions, status codes and category codes. In some cases, names are used + as a unique identifier for a resource (e.g. company name wihtin a transaction audit). + It is difficult to interpret the + tabular data with out an unambiguous definition of the code values or (local) identifiers used.

+

It must be possible to unambiguously associate the notation used within a CSV file + with the appropriate external definition.

+

+ Dependency: + URIMapping. +

+

+ Motivation: + PublicationOfNationalStatistics, + PublicationOfPropertyTransactionData, + SurfaceTemperatureDatabank, + OpenSpendingData, + RepresentingEntitiesAndFactsExtractedFromText, + IntelligentlyPreviewingCSVFiles, + SupportingSemantic-basedRecommendations, + MakingSenseOfOtherPeoplesData, + PublicationOfBiodiversityInformation and + CollatingHumanitarianResponseInformation. +

+
Note
+

Code values expressed within a cell can be associated with external definitions in two ways:

+
    +
  1. The valueURL property, as defined in [tabular-metadata], may be used to provide a URI Template that converts the code value to a URI, thus explicitly identifying the associated external definition. URI Templates are defined in [RFC6570].
  2. +
  3. The foreignKeys property, as defined in [tabular-metadata], may be used to provide a foreign key definition that relates the values in a column of the annotated table to those in a column of another annotated table. The definition of the code value could be provided in the table referenced via the foreign key.
  4. +
+
+
+
R-SyntacticTypeDefinition
+
+ + Ability to declare syntactic type for cells within a specified column. + +

Whilst it is possible to automatically detect the type of data (e.g. date, number) in a + given cell, this can be error prone. For example, the date April 1st if written + as 1/4 may be interpreted as a decimal fraction.

+

It shall be possible to declare the data type for the cells in a given column of a + tabular data file. Only one data type can be declared for a given column.

+
Note
+

An application may still attempt to automatically detect the data type for a given + cell. However, the explicit declaration shall always take precedent.

+
+
Note
+

The data type declaration will typically be used to declare that a column contains + integers, floating point numbers or text. However, it may be used to assert that a cell + contains, say, embedded + XML content (rdf:XMLLiteral), + datetime values (xsd:dateTime) or + geometry expressed as well-known-text (geo:wktLiteral, + GeoSPARQL [geosparql] section 8.5.1 RDFS Datatypes refers).

+
+

+ Motivation: + SurfaceTemperatureDatabank, + DigitalPreservationOfGovernmentRecords, + ReliabilityAnalyzesOfPoliceOpenData, + AnalyzingScientificSpreadsheets, + RepresentingEntitiesAndFactsExtractedFromText, + DisplayingLocationsOfCareHomesOnAMap, + IntelligentlyPreviewingCSVFiles, + CanonicalMappingOfCSV, + SupportingSemantic-basedRecommendations, + PublicationOfBiodiversityInformation, + PlatformIntegrationUsingSTDF and + ConsistentPublicationOfLocalAuthorityData. +

+
Note
+

The syntactic type for a cell value is defined using the datatype annotation. [tabular-data-model] section 4.6 Datatypes lists the built-in datatypes used in this specification; including those defined in [xmlschema11-2] plus number, binary, datetime, any, html, and json. Datatypes can be derived from the built-in datatypes using further annotations; [tabular-metadata] section 5.11.2 Derived Datatypes specifies how to describe derived datatypes within the a metadata file.

+
+
+
R-SemanticTypeDefinition
+
+ + Ability to declare semantic type for cells within a specified column. + +

Each row in a tabular data set describes a given resource or entity. The properties + for that entity are described in the cells of that row. All the cells in a given column + are anticipated to provide the same property.

+

It shall be possible to declare the semantic relationship between the entity that a + given row describes and a cell in a given column.

+

The following example of an occupational listing + illustrates how a row of tabular data can be mapped to equivalent content expressed in RDF (Turtle).

+

The mappings are:

+ +
Example 52
CSV
+---
+
+O*NET-SOC 2010 Code,O*NET-SOC 2010 Title,O*NET-SOC 2010 Description
+         11-1011.00,    Chief Executives,"Determine and formulate policies and provide overall direction of companies [...]."
+{snip}
+
+RDF (Turtle)
+------------
+
+ex:11-1011.00
+    skos:notation "11-1011.00" ;
+    rdfs:label "Chief Executives" ;
+    dc:description "Determine and formulate policies and provide overall direction of companies [...]." .
+ +

A copy of the occupational listing CSV is available locally.

+ +
Note
+

To express semantics in a machine readable form, RDF seems the appropriate choice. + Furthermore, best practice indicates that one should adopt common and widely adopted + patterns (e.g. RDF vocabularies, OWL ontologies) when publishing data to enable a + wide audience to consume and understand the data. Existing (de facto) standard + patterns may add complexity when defining the semantics associated with a particular + row such that a single RDF predicate is insufficient.

+

For example, to express a quantity value using QUDT + we use an instance of qudt:QuantityValue to relate the numerical value + with the quantity kind (e.g. air temperature) and unit of measurement (e.g. + Celsius). Thus the semantics needed for a column containing temperature values might + be: qudt:value/qudt:numericValue – more akin to a LDPath.

+

Furthermore, use of OWL axioms when defining a sub-property of + qudt:value would allow the quantity type and unit of measurement to + be inferred, with the column semantics then being specified as + ex:temperature_Cel/qudt:numericValue.

+
+

+ Motivation: + DigitalPreservationOfGovernmentRecords, + PublicationOfNationalStatistics, + SurfaceTemperatureDatabank, + ReliabilityAnalyzesOfPoliceOpenData, + AnalyzingScientificSpreadsheets, + RepresentingEntitiesAndFactsExtractedFromText, + IntelligentlyPreviewingCSVFiles, + SupportingSemantic-basedRecommendations, + MakingSenseOfOtherPeoplesData, + PublicationOfBiodiversityInformation and + CollatingHumanitarianResponseInformation. +

+
Note
+

The property URL annotation provides the URI for the property relating the value of a given cell to its subject. [tabular-metadata] specifies how a URI Template, specified in [RFC6570], may be used to specify the property URL using the propertyURL property. This property is normally specified for the column and inherited by all the cells within that column.

+
+
+
R-MissingValueDefinition
+
+ + Ability to declare a "missing value" token and, optionally, a reason for the + value to be missing + +

Significant amounts of existing tabular text data include values such as + -999. Typically, these are outside the normal expected range of values + and are meant to infer that the value for that cell is missing. Automated parsing of + CSV files needs to recognise such missing value tokens and behave accordingly. + Furthermore, it is often useful for a data publisher to declare why a value + is missing; e.g. withheld or aboveMeasurementRange

+

+ Motivation: + SurfaceTemperatureDatabank, + OrganogramData, + OpenSpendingData, + NetCdFcDl, + PaloAltoTreeData and + PlatformIntegrationUsingSTDF. +

+
Note
+

[tabular-data-model] defines the null annotation which defines the string or strings that, when matched to the literal content of a cell, cause the cell's value to be interpretted as null (or empty).

+
+
+
R-URIMapping
+
+ + Ability to map cell values within a given column into corresponding URI + +

Tabular data often makes use of local identifiers to uniquely identify an entity described within a tabular data file or to reference an entity described in the same data file or elsewhere (e.g. reference data, code lists, etc.). The local identifier will often be unique within a particular scope (e.g. a code list or data set), but cannot be guaranteed to be globally unique. In order to make these local identifiers globally unique (e.g. so that + the entity described by a row in a tabular data file can be referred to from an external source, or to establish links between the tabular data and the related reference data) it is necessary to map those local identifiers to URIs.

+

It shall be possible to declare how local identifiers used within a column of a particular dataset can be mapped to their respective URI. Typically, this may be achieved by concatenating the local identifier with a prefix - although more complex mappings are anticipated such as removal of "special characters" that are not permitted in URIs (as defined in [RFC3986]) or CURIEs [curie]).

+

Furthermore, where the local identifier is part of a controlled vocabulary, code list or thesaurus, it should be possible to specify the URI for the controlled vocabulary within which the local identfier is defined.

+
Note

Also see the related requirement R-ForeignKeyReferences.

+

+ Motivation: + DigitalPreservationOfGovernmentRecords, + OrganogramData, + PublicationOfPropertyTransactionData, + AnalyzingScientificSpreadsheets, + RepresentingEntitiesAndFactsExtractedFromText, + PaloAltoTreeData, + PublicationOfBiodiversityInformation, + MakingSenseOfOtherPeoplesData and + ExpressingHierarchyWithinOccupationalListings. +

+
Note
+

The valueURL property from [tabular-metadata] specifies how a URI Template, as defined in [RFC6570], may be used to map literal contents of a cell to a URI. The result of evaluating the URI Template is stored in the value URL annotation for each cell.

+
+
+
R-UnitMeasureDefinition
+
+ + Ability identify/express the unit of measure for the values reported in a given column. + +

Data from measurements is often published and exchanged as tabular data. In order for the values of those measurements to be correctly understood, it is essential that the unit of measurement associated with the values can be specified. For example, without specifying the unit of measurement as kilometers, the floating point value 21.5 in a column entitled distance is largely meaningless.

+

+ Motivation: + AnalyzingScientificSpreadsheets, + OpenSpendingData, + IntelligentlyPreviewingCSVFiles, + ChemicalImaging, + ChemicalStructures, + NetCdFcDl and + PaloAltoTreeData +

+
Note
+

This specification provides no native mechanisms for expressing the unit of measurement associated with values of cells in a column.

+

However, annotations may be used to provide this additional information. The [tabular-data-primer] provides examples of how this might be achieved; from providing descriptive metadata for the column, to enabling transformation of cell values to structured data with unit of measurement properties.

+

Also note that the [vocab-data-cube] provides another alternative for annotations; structural metadata is used to provide the metadata required to interpret data values - such as the unit of measurement.

+
+
+
R-GroupingOfMultipleTables
+
+ + Ability to group multiple data tables into a single package for + publication + +

When publishing sets of related data tables, it shall be possible to provide annotation for the + group of related tables. Annotation concerning a group of tables may include summary + information about the composite dataset (or "group") that the individual tabular datasets belong too, + such as the license under which the dataset is made available.

+

The implication is that the group shall be identified as an entity + in its own right, thus enabling assertions to be made about that group. The relationship + between the group and the associated tabular datasets will need to be made explicit.

+

Furthermore, where appropriate, it shall be possible to describe the interrelationships + between the tabular datasets within the group.

+

The tabular datasets comprising a group need not be hosted at the same URL. As such, + a group does not necessarily to be published as a single package (e.g. as a zip) - although we + note that this is a common method of publication.

+

+ Motivation: + PublicationOfNationalStatistics, + OrganogramData, + ChemicalStructures and + NetCdFcDl. +

+
Note
+

The group of tables, as defined in [tabular-data-model] is a first class entity within the tabular data model. A group of tables comprises a set of annotated tables and a set of annotations that relate to that group of tables.

+
+
+
R-MultilingualContent
+
+ + Ability to declare a locale / language for content in a specified column + +

Tabular data may contain literal values for a given property in multiple languages. For example, the name of a town in English, French and Arabic. It shall be possible to:

+
    +
  • specify the property for which the literal values are supplied; and
  • +
  • specify the language / locale relevant to all data values in a given column.
  • +
+

Additionally, it should be possible to provide supplementary labels for column headings in multiple languages.

+

+ Motivation: + CollatingHumanitarianResponseInformation. +

+
Note
+

The lang annotation, as defined in [tabular-data-model], may be used to express the code for the expected language for values of cells in a particular column. The language code is expressed in the format defined by [BCP47].

+

Furthermore, the titles annotation allows for any number of human-readable titles to be given for a column, each of which may have an associated language code as defined by [BCP47].

+
+
+
R-RepeatedProperties
+
+ + Ability to provide multiple values of a given property for a single entity described within a tabular data file + +

It is commonplace for a tabular data file to provide multiple values of a given property + for a single entity. This may be achieved in a number of ways.

+

First, the multiple rows may be used to describe the same entity; each such row using the same + unique identifier for the entity. For example, a country, identified using its + two-letter country code, + may have more than one name:

+
Example 53
CSV:
+----
+
+country,name
+AD,     Andorra
+AD,     Principality of Andorra
+AF,     Afghanistan
+AF,     Islamic Republic of Afghanistan
+{snip}
+
+Equivalent JSON:
+----------------
+
+[{
+  "country": "AD",
+  "name": [ "Andorra", "Principality of Andorra" ]
+},{
+  "country": "AF",
+  "name": [ "Afghanistan", "Islamic Republic of Afghanistan" ]
+}]
+ +

Second, a single row within a tabular data set may contain multiple values for a given property + by declaring that multiple columns map to the same property. For example, multiple locations:

+ + +
Example 54
CSV:
+----
+
+geocode #1,geocode #2,geocode #3
+    020503,          ,
+    060107,    060108,
+    173219,          ,
+    530012,    530013,    530015
+    279333,          ,
+
+Equivalent RDF (in Turtle syntax):
+----------------------------------
+
+row:1 admingeo:gssCode ex:020503 .
+row:2 admingeo:gssCode ex:060107, ex:060108 .
+row:3 admingeo:gssCode ex:173219 .
+row:4 admingeo:gssCode ex:530012, ex:530013, ex:530015 .
+row:5 admingeo:gssCode ex:279333 .
+ +
Note
+

In this case, it is essential to declare that each of the columns refer to the same property. In the + example above, all the geocode columns in the example above map to + admin:gssCode.

+
+ +

Finally, microsyntax may provide a list of values within a single + cell. For example, a semi-colon ";" delimited list of comments about the characteristics + of a tree within a municipal database:

+ +
Example 55
CSV:
+----
+
+GID,Tree ID, On Street,From Street,To Street,             Species,[...],Comments
+  6,     34,ADDISON AV, EMERSON ST,RAMONA ST,Robinia pseudoacacia,[...],cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay;  beware of BEES.
+{snip}
+
+Equivalent JSON:
+----------------
+
+[{
+  "GID": "6",
+  "Tree_ID": "34",
+  "On_Street": "ADDISON AV",
+  "From_Street": "EMERSON ST",
+  "To_Street": "RAMONA ST",
+  "Species": "Robinia pseudoacacia",
+  "Comments": [ "cavity or decay", "trunk decay", "codominant leaders", "included bark", "large leader or limb decay", "previous failure root damage", "root decay", "beware of BEES."]
+}]
+ +

Note that the example above is based on the Palo Alto tree data use case; albeit truncated for clarity.

+ +
Note

In writing this requirement, no assumption has been made regarding how the repeated values should be implemented in RDF, JSON or XML.

+ +

Motivation: + JournalArticleSearch, + PaloAltoTreeData, + SupportingSemantic-basedRecommendations and + CollatingHumanitarianResponseInformation. +

+ +
Note
+

Within an annotate table, the values of cells can be considered as RDF subject-predicate-object triples (see [rdf11-concepts]). The about URL annotation may be used to define the subject of the triple derived from a cell, and, where the same about URL annotation is used for every cell within a row, the resource identified by the about URL annotation can be considered to be the subject of the row.

+

The same about URL annotation can be used to describe cells in more than one row, thus enabling information about a single subject to be spread across multiple rows.

+

Similarly, the property URL annotation may be used to define the predicate of the triple derived from a cell. The same property URL annotation may be used for multiple columns, meaning that multiple values of a single property can be provided across multiple columns.

+

Finally, note that arrays of values may be provided by a single cell. Please refer to requirement R-CellMicrosyntax for further details.

+
+
+
+
+
+
+

3.2 Partially accepted requirements

+
+

3.2.1 Data model requirements

+
+
R-CellMicrosyntax
+
+ + Ability to parse internal data structure within a cell value + +

Cell values may represent more complex data structures for a given column such as lists and time stamps. + The presence of complex data structures within a given cell is referred to as microsyntax.

+

If present parsers should have the option of handling the microsyntax or ignoring it and treating it as + a scalar value. +

+

Looking in further detail at the uses of microsyntax, four types of usage are prevalent:

+
    +
  1. various date/time syntaxes (not just ISO-8601 ones)
  2. +
  3. delimited lists of literal values to express multiple values of the same property + (typically comma "," delimited, but other delimiters are also used)
  4. +
  5. embedded structured data such as XML, JSON or + well-known-text (WKT) literals
  6. +
  7. semi structured text
  8. +
+

The following requirements pertain to describing and parsing microsyntax:

+
    +
  • to document microsyntax so that humans can understand + what it is conveying; e.g. to provide human-readable annotation
  • +
  • to validate the cell values to ensure they conform to the expected microsyntax
  • +
  • to label the value as being in a particular microsyntax when converting into + JSON/XML/RDF; e.g. marking an XML value as an XMLLiteral or a datetime + value as xsd:dateTime
  • +
  • to process the microsyntax into an appropriate data structure when converting + into JSON/XML/RDF
  • +
+

The ability to declare that a column within a tabular data file carries values of a + particular type, and the potential validation of the cell against the declared type, + is covered in R-SyntacticTypeDefinition + and is not discussed further here.

+ +

We can consider cell values with microsyntax to be annotated strings. The annotation (which might + include a definition of the format of the string - such as defining the delimiter used for a list) + can be used to validate the string and (in some cases) convert it into a suitable value or data + structure.

+

Microsyntax, therefore, requires manipulation of the text if processed. Typically, this will + relate to conversion of lists into multiple-valued entries, but may also include reformatting of + text to convert between formats (e.g. to convert a datetime value to a date, or locale dates to + ISO 8601 compliant syntax).

+ +

Motivation: + JournalArticleSearch, + PaloAltoTreeData, + SupportingSemantic-basedRecommendations, + ExpressingHierarchyWithinOccupationalListings and + PlatformIntegrationUsingSTDF. +

+ +
Note
+

This specification indicates how applications should provide support for validating the format, or syntax, of the literal content provided in cells. [tabular-data-model] section 6.4 Parsing Cells describes validation of formats for numeric datatypes, boolean, dates, times, and durations.

+

Please refer to R-SyntacticTypeDefinition for details of the associated requirement.

+

A regular expression, with syntax and processing as defined in [ECMASCRIPT], may be used to validate the format of a string value. In this way, the syntax of embedded structured data (e.g. html, json, xml and well known text literals) can be validated.

+

However, support for the extraction of values from structured data is limited to the parsing the cell content to extract an array of values. Parsers must use the value of the separator annotation, as specified in [tabular-data-model], to split the literal content of the cell. All values within the array are considered to be of the same datatype.

+

This functionality meets the needs of 4 out of 5 motivating requirements:

+
    +
  • JournalArticleSearch: date-time formats dealt with as a native datatype and the list of authors is treated as an array. The journal title does contain html markup (e.g. the <i> html element) but the use case indicates that it is acceptable to treat this as literal text.
  • +
  • PaloAltoTreeData: list of comments delimited with semi-colon (";") are mapped to an array of values.
  • +
  • SupportingSemantic-basedRecommendations: the 'semantic paths' are a comma delimited lit of URIs which are mapped to an array of values. The use case does not indicate that different semantics need to be applied to each value in the array.
  • +
  • PlatformIntegrationUsingSTDF: escape sequences for 'special characters' are not supported, but the use case indicates that "these special characters don't affect the parsing" so are considered not to be a microsyntax from which separate data values are to be extracted.
  • +
+

This specification does not natively meet the requirement to extract values from other structured data formats; the Working Group deemed this to add significant complexity to both specification and conforming applications.

+

That said, an annotated table may specify transformations which define a list of specifications for converting the associated annotated table into other formats using a script or template such as Mustache. These scripts or templates may be used to extract values from structured data, operating on the annotated table itself, the RDF graph provided from transforming the annotated table into RDF using standard mode (as specified in [csv2rdf]), or the JSON provided when using the standard mode specified in [csv2json]. Transformation specifications are defined in [tabular-metadata] section 5.10 Transformation Definitions.

+

Use case ExpressingHierarchyWithinOccupationalListings requires the extraction of values from substrings within cell values (e.g. different parts of the structured occupation code). Such processing may be achievable using scripts or templates which can be specified using a transformation definition.

+
+
+
R-CsvAsSubsetOfLargerDataset
+
+ + Ability to assert how a single CSV file is a facet or subset of a larger + dataset + +

A large tabular dataset may be split into several files for publication; perhaps to ensure that + each file is a manageable size or to publish the updates to a dataset during the (re-)publishing cycle. It shall + be possible to declare that each of the files is part of the larger dataset and to describe what + content can be found within each file in order to allow users to rapidly find the particular file + containing the information they are interested in.

+

+ Motivation: + SurfaceTemperatureDatabank, + PublicationOfPropertyTransactionData, + JournalArticleSearch, + ChemicalImaging and + NetCdFcDl. +

+
Note
+

This specification provides only a simple grouping mechanism to relate annotated tables, as described in [tabular-data-model] section 4.1 Table groups. Large tabular datasets may be subdivided into smaller parts for easier management. Each of the smaller parts may be related to each other using a group of tables.

+

However, no mechanism is provided for describing the relationship between tables other than simple grouping. Other specifications, such as [vocab-data-cube] and [void], provide mechanisms to describe subsets of data that can be used to meet this requirement. Such descriptions can be included as metadata annotations in the form of notes.

+
+
+
+
+
+
+

3.3 Deferred requirements

+
+

3.3.1 CSV parsing requirements

+
+
R-WellFormedCsvCheck
+
+ + Ability to determine that a CSV is syntactically well formed + +

In order to automate the parsing of information published in CSV form, it is + essential that that content be well-formed with respect to the + syntax for tabular data [tabular-data-model].

+

+ Motivation: + DigitalPreservationOfGovernmentRecords, + OrganogramData, + ChemicalImaging, + ChemicalStructures, + NetCdFcDl, + PaloAltoTreeData, + CanonicalMappingOfCSV, + IntelligentlyPreviewingCSVFiles, + MakingSenseOfOtherPeoplesData and + ConsistentPublicationOfLocalAuthorityData. +

+
Note

+ This requirement has been deferred as normative specification for parsing CSV is outside the scope of the Working Group charter. [tabular-data-model] does provide non-normative definition of parsing of CSV files, including flexibility to parse tabular data that does not use commas as separators. +

+
+
R-MultipleHeadingRows
+
+ + Ability to handle headings spread across multiple initial rows, as well as to distinguish between single column headings and file headings. + +

Row headings should be distinguished from file headings (if present). Also, in case subheadings are present, it should be possible to define their coverage + (i.e. how many columns they refer to).

+

+ Motivation: + PublicationOfNationalStatistics, + AnalyzingScientificSpreadsheets, + IntelligentlyPreviewingCSVFiles, + CollatingHumanitarianResponseInformation, + ExpressingHierarchyWithinOccupationalListings and + PlatformIntegrationUsingSTDF. +

+
Note

+ The Working Group decided to rule headings spanning multiple columns out of scope. However, it is possible to skip initial rows that do not contain header information using skipRows and to specify that a table contains multiple header rows using headerRowCount when describing a dialect, as described in [tabular-metadata]. +

+
+
R-TableNormalization
+
+ + Ability to transform data that is published in a normalized form into tabular data. + +

Textual data may be published in a normalized form; often improving human readability by reducing the number of lines in the data file. As a result, such a normalized data file will no longer be regular as additional informtion is included in each row (e.g., the number of columns will vary because more cells are provided for some rows).

+
Note
+

Use of the term normalized is meant in a general sense, rather than the specific meaning relavant to relational databases.

+
+

Such a normalized data file must be transformed into a tabular data file, as defined by the model for tabular data [tabular-data-model], prior to applying any further transformation.

+

+ Motivation: + RepresentingEntitiesAndFactsExtractedFromText. +

+
Note
+

The motivating use case is an example where we have a CSV file that is not well-formed - in this particular case, the number of columns varies row by row and therefore does not conform to the model for tabular data [tabular-data-model].

+

The ability to transform a data file into a tabular data file is a necessary prerequisite for any subsequent transformation. That said, such a transformation is outside the scope of this Working Group as it requires a parsing a data file with any structure.

+

Such pre-processing to create a tabular data file from a given structure is + likely to be reasonably simple for a programmer to implement, but it cannot be generalised.

+
+
+
+
+
+

3.3.2 Applications requirements

+
+
R-RandomAccess
+
+ + Ability to access and/or extract part of a CSV file in a non-sequential manner. + +

Large datasets may be hard to process in a sequential manner. It may be useful to have the possibility to directly access part of them, possibly by means of a pointer to a given row, cell or region.

+

+ Motivation: + SupportingSemantic-basedRecommendations. +

+
Note

+ A standardised mechanism for querying tabular data is outside the scope of the Working Group. However, it is possible to use fragment identifiers as defined in [RFC7111] to identify columns, rows, cells, and regions of CSV files, and sufficient information is kept in the tabular data model to ensure that this ability is retained. +

+
+
R-CsvToXmlTransformation
+
+ + Ability to transform a CSV into XML + +

Standardised CSV to XML transformation mechanisms mitigate the need for bespoke transformation software to be developed by CSV data consumers, + thus simplifying the exploitation of CSV data.

+

+ Motivation: + DigitalPreservationOfGovernmentRecords. +

+
Note
+

Although the charter of the Working Group includes a work item for CSV to XML conversion, this requirement has unfortunately been deferred. The Working Group was unable to find XML experts to assist in delivery of this work item. The lack of available effort combined with motivation for this requirement being provided by a single use case only meant that the Working Group was forced to abandon this deliverable.

+
+
+
R-ConditionalProcessingBasedOnCellValues
+
+ + Ability to apply conditional processing based on the value of a specific cell + +

When transforming CSV content into XML, JSON or RDF it shall be possible to vary the transformation of the information in a particular row based on the values within a cell, or element within a cell, contained within that row.

+

To vary the transformation based on an element within a cell, the value of that cell must be well structured. See CellMicrosyntax for more information.

+

+ Motivation: + ExpressingHierarchyWithinOccupationalListings. +

+
Note
+

The ability to control the processing of tabular data based on values in a particular cell is not natively supported by this specification. Following detailed analysis, the Working Group concluded that such functionality would add significant complexity to the specification and implementing applications. However, an annotated table may specify transformations which define a list of specifications for converting the associated annotated table into other formats using a script or template such as Mustache. These scripts or templates may be used to provide conditional processing, operating on the annotated table itself, the RDF graph provided from transforming the annotated table into RDF using standard mode (as specified in [csv2rdf]), or the JSON provided when using the standard mode specified in [csv2json]. Transformation specifications are defined in [tabular-metadata] section 5.10 Transformation Definitions.

+
+
+
+
+
+
+ +
+

A. Acknowledgements

+
At the time of publication, the following individuals had participated in the Working Group, in the order of their first name: + Adam Retter, + Alf Eaton, + Anastasia Dimou, + Andy Seaborne, + Axel Polleres, + Christopher Gutteridge, + Dan Brickley, + Davide Ceolin, + Eric Stephan, + Erik Mannens, + Gregg Kellogg, + Ivan Herman, + Jeni Tennison, + Jeremy Tandy, + Jürgen Umbrich, + Rufus Pollock, + Stasinos Konstantopoulos, + William Ingram, and + Yakov Shafranovich. +
+
+ +
+

B. Changes since previous versions

+
+

B.1 Changes since working draft of 01 July 2014

+ +
+
+

B.2 Changes since first public working draft of 27 March 2014

+ +
+
+ + +

C. References

C.1 Normative references

[BCP47]
A. Phillips; M. Davis. Tags for Identifying Languages. September 2009. IETF Best Current Practice. URL: https://tools.ietf.org/html/bcp47 +
[csv2json]
Jeremy Tandy; Ivan Herman. Generating JSON from Tabular Data on the Web. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/csv2json/ +
[csv2rdf]
Jeremy Tandy; Ivan Herman; Gregg Kellogg. Generating RDF from Tabular Data on the Web. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/csv2rdf/ +
[tabular-data-model]
Jeni Tennison; Gregg Kellogg. Model for Tabular Data and Metadata on the Web. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/tabular-data-model/ +
[tabular-metadata]
Jeni Tennison; Gregg Kellogg. Metadata Vocabulary for Tabular Data. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/tabular-metadata/ +

C.2 Informative references

[ECMASCRIPT]
ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ +
[RFC3986]
T. Berners-Lee; R. Fielding; L. Masinter. Uniform Resource Identifier (URI): Generic Syntax. January 2005. Internet Standard. URL: https://tools.ietf.org/html/rfc3986 +
[RFC4180]
Y. Shafranovich. Common Format and MIME Type for Comma-Separated Values (CSV) Files. October 2005. Informational. URL: https://tools.ietf.org/html/rfc4180 +
[RFC6570]
J. Gregorio; R. Fielding; M. Hadley; M. Nottingham; D. Orchard. URI Template. March 2012. Proposed Standard. URL: https://tools.ietf.org/html/rfc6570 +
[RFC7111]
M. Hausenblas; E. Wilde; J. Tennison. URI Fragment Identifiers for the text/csv Media Type. January 2014. Informational. URL: https://tools.ietf.org/html/rfc7111 +
[RFC7159]
T. Bray, Ed.. The JavaScript Object Notation (JSON) Data Interchange Format. March 2014. Proposed Standard. URL: https://tools.ietf.org/html/rfc7159 +
[curie]
Mark Birbeck; Shane McCarron. CURIE Syntax 1.0. 16 December 2010. W3C Note. URL: http://www.w3.org/TR/curie +
[geosparql]
OGC GeoSPARQL - A Geographic Query Language for RDF Data. OpenGIS Implementation Specification. URL: https://portal.opengeospatial.org/files/?artifact_id=47664 +
[json-ld]
Manu Sporny; Gregg Kellogg; Markus Lanthaler. JSON-LD 1.0. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/json-ld/ +
[rdf11-concepts]
Richard Cyganiak; David Wood; Markus Lanthaler. RDF 1.1 Concepts and Abstract Syntax. 25 February 2014. W3C Recommendation. URL: http://www.w3.org/TR/rdf11-concepts/ +
[tabular-data-primer]
Jeni Tennison. CSV on the Web: A Primer. W3C Note. URL: http://www.w3.org/TR/2016/NOTE-tabular-data-primer-20160225/ +
[turtle]
Eric Prud'hommeaux; Gavin Carothers. RDF 1.1 Turtle. 25 February 2014. W3C Recommendation. URL: http://www.w3.org/TR/turtle/ +
[vocab-data-cube]
Richard Cyganiak; Dave Reynolds. The RDF Data Cube Vocabulary. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/vocab-data-cube/ +
[void]
Keith Alexander; Richard Cyganiak; Michael Hausenblas; Jun Zhao. Describing Linked Datasets with the VoID Vocabulary. 3 March 2011. W3C Note. URL: http://www.w3.org/TR/void/ +
[xml]
Tim Bray; Jean Paoli; Michael Sperberg-McQueen; Eve Maler; François Yergeau et al. Extensible Markup Language (XML) 1.0 (Fifth Edition). 26 November 2008. W3C Recommendation. URL: http://www.w3.org/TR/xml +
[xmlschema11-2]
David Peterson; Sandy Gao; Ashok Malhotra; Michael Sperberg-McQueen; Henry Thompson; Paul V. Biron et al. W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes. 5 April 2012. W3C Recommendation. URL: http://www.w3.org/TR/xmlschema11-2/ +
\ No newline at end of file diff --git a/test/docs/metadata/mediacapture-depth.html b/test/docs/metadata/mediacapture-depth.html new file mode 100644 index 000000000..fac5bef61 --- /dev/null +++ b/test/docs/metadata/mediacapture-depth.html @@ -0,0 +1,1460 @@ + + + + + Media Capture Depth Stream Extensions + + + + + + +

Abstract

+

+ This specification extends + the Media Capture and Streams specification [GETUSERMEDIA] + to allow a depth-only stream or combined depth+video + stream to be requested from the web platform using APIs familiar to + web authors. +

+

Status of This Document

+ + + +

+ This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. +

+ + + +

+ This extensions specification defines a new media type and + constrainable property per Extensibility + guidelines of the Media Capture and Streams specification + [GETUSERMEDIA]. Horizontal reviews and feedback from early + implementations of this specification are encouraged. +

+ + + +

+ This document was published by the Device APIs Working Group and the Web Real-Time Communications Working Group as a Working Draft. + + This document is intended to become a W3C Recommendation. + + + If you wish to make comments regarding this document, please send them to + public-media-capture@w3.org + (subscribe, + archives). + + + + + + + All comments are welcome. + + +

+ + + + +

+ Publication as a Working Draft does not imply endorsement by the W3C + Membership. This is a draft document and may be updated, replaced or obsoleted by other + documents at any time. It is inappropriate to cite this document as other than work in + progress. +

+ + + +

+ + This document was produced by + + groups + operating under the + 5 February 2004 W3C Patent + Policy. + + + + + W3C maintains a public list of any patent disclosures (Device APIs Working Group) and a public list of any patent disclosures (Web Real-Time Communications Working Group) + + made in connection with the deliverables of + + each group; these pages also include + + instructions for disclosing a patent. An individual who has actual knowledge of a patent + which the individual believes contains + Essential + Claim(s) must disclose the information in accordance with + section + 6 of the W3C Patent Policy. + + +

+ +

This document is governed by the 1 September 2015 W3C Process Document. +

+ + + + + + +

Table of Contents

+ +
+

1. + Introduction +

+

+ Depth cameras are increasingly being integrated into devices such as + phones, tablets, and laptops. Depth cameras provide a depth map, + which conveys the distance information between points on an object's + surface and the camera. With depth information, web content and + applications can be enhanced by, for example, the use of hand gestures + as an input mechanism, or by creating 3D models of real-world objects + that can interact and integrate with the web platform. Concrete + applications of this technology include more immersive gaming + experiences, more accessible 3D video conferences, and augmented + reality, to name a few. +

+

+ To bring depth capability to the web platform, this specification + extends + the MediaStream interface [GETUSERMEDIA] to + enable it to also contain depth-based + MediaStreamTracks. A depth-based + MediaStreamTrack, referred to as a depth stream + track, represents an abstraction of a stream of frames that can + each be converted to objects which contain an array of pixel data, + where each pixel represents the distance between the camera and the + objects in the scene for that point in the array. A + MediaStream object that contains one or more + depth stream tracks is referred to as a depth-only stream + or depth+video stream. +

+

+ Depth cameras usually produce 16-bit depth values per pixel. However, + neither the canvas drawing surface used to draw and manipulate 2D + graphics on the web platform nor the ImageData + interface used to represent image data support 16 bits per pixel. To + address the issue, this specification defines a conversion into a 8-bit + grayscale representation of a depth map for consumption by APIs + that are limited to 8 bits per pixel. +

+

+ The Media Capture Stream with Worker specification + [MEDIACAPTURE-WORKER] that complements this specification enables + processing of 16-bit depth values per pixel directly in a worker + environment and makes the <video> and + <canvas> indirection and depth-to-grayscale + conversion redundant. This alternative pipeline that supports greater + bit depth and does not incur the performance penalty of the indirection + and conversion enables more advanced use cases. +

+
+
+

2. + Use cases and requirements +

+

+ This specification attempts to address the Use + Cases and Requirements for accessing depth stream from a depth + camera. See also the + Examples section for concrete usage examples. +

+
+

3. Conformance

+

+ As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, + and notes in this specification are non-normative. Everything else in this specification is + normative. +

+

The key words MUST and MUST NOT are + to be interpreted as described in [RFC2119]. +

+ +

+ This specification defines conformance criteria that apply to a single + product: the user agent that implements the interfaces that + it contains. +

+

+ Implementations that use ECMAScript to implement the APIs defined in + this specification must implement them in a manner consistent with the + ECMAScript Bindings defined in the Web IDL specification [WEBIDL], + as this specification uses that specification and terminology. +

+
+
+

4. + Dependencies +

+

+ The + MediaStreamTrack and + MediaStream interfaces this specification extends + are defined in [GETUSERMEDIA]. +

+

+ The + Constraints, + MediaStreamConstraints, + MediaTrackSettings, + MediaTrackConstraints, + MediaTrackSupportedConstraints, + MediaTrackCapabilities, and + MediaTrackConstraintSet dictionaries this + specification extends are defined in [GETUSERMEDIA]. +

+

+ The + getUserMedia(), applyConstraints(), + getSettings() + methods and the + NavigatorUserMediaSuccessCallback callback are + defined in [GETUSERMEDIA]. +

+

+ The concepts muted + and disabled + as applied to MediaStreamTrack are defined in [GETUSERMEDIA]. +

+

+ The terms source + and consumer + are defined are [GETUSERMEDIA]. +

+

+ The SourceTypeEnum + and + MediaDeviceKind enumerations are defined in + [GETUSERMEDIA]. +

+

+ The ImageData + and VideoTrack + interfaces are defined in [HTML]. +

+
+
+

5. + Terminology +

+

+ The term depth+video stream means a MediaStream + object that contains one or more MediaStreamTrack objects of + kind "depth" (depth stream track) and one or more + MediaStreamTrack objects of kind "video" (video + stream track). +

+

+ The term depth-only stream means a MediaStream object + that contains one or more MediaStreamTrack objects of kind + "depth" (depth stream track) only. +

+

+ The term video-only stream means a MediaStream object + that contains one or more MediaStreamTrack objects of kind + "video" (video stream track) only, and optionally + of kind "audio". +

+

+ The term depth stream track means a MediaStreamTrack + object whose kind is "depth". It represents a media stream + track whose source is a depth camera. +

+

+ The term video stream track means a MediaStreamTrack + object whose kind is "video". It represents a media stream + track whose source is a video camera. +

+
+

5.1 + Depth map +

+

+ A depth map is an abstract representation of a frame of a + depth stream track. A depth map is an image that + contains information relating to the distance of the surfaces of + scene objects from a viewpoint. +

+

+ A depth map has an associated focal length which is + a double. It represents the focal length of the camera in + millimeters. +

+

+ A depth map has an associated horizontal field of + view which is a double. It represents the horizontal angle of + view in degrees. +

+

+ A depth map has an associated vertical field of + view which is a double. It represents the vertical angle of + view in degrees. +

+

+ A depth map has an associated unit which is a + string. It represents the active depth map unit. +

+

+ A depth map has an associated near value which is a + double. It represents the minimum range in active depth map + units. +

+

+ A depth map has an associated far value which is a + double. It represents the maximum range in active depth map + units. +

+
+
+
+

6. + Extensions +

+
+

6.1 + MediaStreamConstraints dictionary +

+
partial dictionary MediaStreamConstraints {
+    (boolean or MediaTrackConstraints) depth = false;
+};
+

+ If the depth dictionary member has the value + true, the MediaStream returned by the getUserMedia() + method MUST contain a depth stream track. If the depth + dictionary member is set to false, is not provided, or is set to + null, the MediaStream MUST NOT contain a depth stream + track. +

+

+ If active depth map unit is provided in + MediaTrackConstraints, let that unit be the active depth + map unit for the returned depth stream track. +

+
Note
+ If the user agent requests a combined depth+video stream, the + devices in the constraint should be satisfied as belonging to the + same group or physical device. The decision to select and satisfy + which device pair is left up to the implementation. +
+
+
+

6.2 + MediaStream interface +

+
partial interface MediaStream {
+    sequence<MediaStreamTrack> getDepthTracks();
+};
+

+ The getDepthTracks() method, when invoked, + MUST return a sequence of depth + stream tracks in this stream. +

+

+ The getDepthTracks() method MUST return a + sequence that represents a snapshot of all the + MediaStreamTrack objects in this stream's track + set whose kind is equal to "depth". + The conversion from the track set to the sequence is user + agent defined and the order does not have to be stable between + calls. +

+

+ The MediaStream consumer for the depth-only + stream and depth+video stream is the video element [HTML]. +

+
Note
+ New consumers may be added in a future version of this + specification. +
+
+

6.2.1 + Implementation considerations +

This section is non-normative.

+

+ A video stream track and a depth stream track can be + combined into one depth+video stream. The rendering of the + two tracks are intended to be synchronized. The resolution of the + two tracks are intended to be same. And the coordination of the two + tracks are intended to be calibrated. These are not hard + requirements, since it might not be possible to synchronize tracks + from sources. +

+
+
+
+

6.3 + MediaStreamTrack interface +

+

+ The kind attribute MUST, on getting, return + the string "depth" if the object represents a depth + stream track. +

+

+ If a MediaStreamTrack of kind "depth" is + muted or disabled, it MUST render black frames, or a + zero-information-content equivalent. +

+

+ The string "depth" is the SourceTypeEnum value + for the source that is a local depth camera source. +

+
+
+

6.4 + MediaDeviceInfo interface +

+

+ The string "depthinput" is the MediaDeviceKind + value for the depth camera input device. +

+
+
+

6.5 + Media provider object +

+

+ A media + provider object can represent a depth-only stream (and + specifically, not a depth+video stream). The user agent + MUST support a media element with + an assigned + media provider object that is a depth-only stream, and in + particular, the srcObject + IDL attribute that allows the media element to be + assigned a media provider + object MUST, on setting and getting, behave as specified in + [HTML]. +

+
+
+

6.6 + The video element +

+

+ For a video + element whose assigned + media provider object is a depth-only stream, the user + agent MUST, for each pixel of the media data that is + represented by a depth map, convert the depth map value to + grayscale prior to when the video element is + potentially + playing. +

+

+ For a video + element whose assigned + media provider object is a depth+video stream, the user + agent MUST act as if all the MediaStreamTracks of kind + "depth" were removed prior to when the + video element is potentially + playing. +

+

+ The algorithm to convert the depth map value to grayscale, + given a depth map value d, is as follows: +

+
    +
  1. Let bit depth be the bit depth of the depth map. +
  2. +
  3. Let near be the the near value. +
  4. +
  5. Let far be the the far value. +
  6. +
  7. If bit depth is greater than 8, then apply the + rules to convert using range inverse to d to obtain + quantized value d8bit. +
  8. +
  9. Otherwise, apply the rules to convert using range linear + to d to obtain quantized value + d8bit. +
  10. +
  11. Return d8bit. +
  12. +
+

+ The rules to convert using range inverse are as given in + the following formula: +

Range inverse
+ Quantization +

+ The rules to convert using range linear are as given in + the following formula: +

Range linear
+ Quantization +
+

6.6.1 + VideoTrack interface +

+

+ For each depth stream track in the depth-only stream, + the user agent MUST create a corresponding VideoTrack + as defined in [HTML]. +

+
+
+
+

6.7 + MediaTrackSettings dictionary +

+

+ When the getSettings() method is invoked on a depth stream + track, the user agent MUST return the following dictionary + that extends the MediaTrackSettings dictionary: +

+
enum RangeFormat {
+    "inverse",
+    "linear"
+};
+
+partial dictionary MediaTrackSettings {
+    double        focalLength;
+    RangeFormat   format;
+    double        horizontalFieldOfView;
+    double        verticalFieldOfView;
+    DepthMapUnit? depthMapUnit;
+    double        near;
+    double        far;
+};
+
+

+ The focalLength dictionary member + represents the depth map's focal length. +

+

+ The format dictionary member represents the + depth to grayscale conversion method applied to the depth + map in the convert the depth map value to grayscale + algorithm. The RangeFormat enumeration represents the + possible values. If the value is "inverse", the rules to convert using range + inverse have been applied, and if the value is "linear", the rules to convert using range + linear have been applied. +

+

+ The horizontalFieldOfView dictionary member + represents the depth map's horizontal field of view. +

+

+ The verticalFieldOfView dictionary member + represents the depth map's vertical field of view. +

+

+ The depthMapUnit dictionary member represents + the active depth map unit. +

+

+ The near dictionary member represents the + depth map's near value. +

+

+ The far dictionary member represents the + depth map's far value. +

+
+
+
+

6.8 + WebGLRenderingContext interface +

+
+

6.8.1 + Implementation considerations +

This section is non-normative.

+

+ A video element whose source is a + MediaStream object containing a depth stream + track may be uploaded to a WebGL texture of format + RGB and type UNSIGNED_BYTE. [WEBGL] +

+

+ For each pixel of this WebGL texture, the R component represents + the lower 8 bit value of 16 bit depth value, the G component + represents the upper 8 bit value of 16 bit depth value and the + value in B component is not defined. +

+
+
+
+

6.9 + depthMapUnit constrainable property +

+

+ The depthMapUnit constrainable property is defined to apply + only to depth stream tracks. +

+ + + + + + + + + + + + + + + +
+ Property name + + Values + + Notes +
+ depthMapUnit + + DOMString + + This property is used for setting the initial active depth + map unit when the getUserMedia() method is invoked, + and is not applicable for subsequent media control. +
+

+ The applyConstraints() method MUST reject the promise with + OverconstrainedError, when invoked with + depthMapUnit property. +

+
enum DepthMapUnit {
+    "mm",
+    "m"
+};
+

+ The DepthMapUnit enumeration represents the + possible units for a depth map. The "mm" value indicates millimeters, the "m" value indicates meters. +

+
partial dictionary MediaTrackConstraints {
+    DepthMapUnit unit = "mm";
+};
+
+partial dictionary MediaTrackConstraintSet {
+    ConstrainBoolean unit;
+};
+

+ The depthMapUnit of MediaTrackConstraints + is said to be the active depth map unit for the depth + stream track, when getUserMedia() invocation has + succeeded. +

+
partial dictionary MediaTrackSupportedConstraints {
+    boolean unit = true;
+};
+
+partial dictionary MediaTrackCapabilities {
+    DepthMapUnit unit;
+};
+
+
+
+

7. + Examples +

This section is non-normative.

+

+ Playback of depth+video stream +

+
Example 1
navigator.mediaDevices.getUserMedia({
+  depth: true,
+  video: true
+}).then(function (stream) {
+    // Wire the media stream into a <video> element for playback.
+    // The RGB video is rendered.
+    var video = document.querySelector('#video');
+    video.srcObject = stream;
+    video.play();
+
+    // Construct a depth-only stream out of the existing depth stream track.
+    var depthOnlyStream = new MediaStream(s.getDepthTracks()[0]);
+
+    // Wire the depth-only stream into another <video> element for playback.
+    // The depth information is rendered in its grayscale representation.
+    var depthVideo = document.querySelector('#depthVideo');
+    depthVideo.srcObject = depthOnlyStream;
+    depthVideo.play();
+  }
+);
+

+ WebGL Fragment Shader based post-processing +

+
Example 2
// This code sets up a video element from a depth stream, uploads it to a WebGL
+// texture, and samples that texture in the fragment shader, reconstructing the
+// 16-bit depth values from the red and green channels.
+navigator.mediaDevices.getUserMedia({
+  depth: true,
+}).then(function (stream) {
+  // wire the stream into a <video> element for playback
+  var depthVideo = document.querySelector('#depthVideo');
+  depthVideo.srcObject = stream;
+  depthVideo.play();
+}).catch(function (reason) {
+  // handle gUM error here
+});
+
+// ... later, in the rendering loop ...
+gl.texImage2D(
+   gl.TEXTURE_2D,
+   0,
+   gl.RGB,
+   gl.RGB,
+   gl.UNSIGNED_BYTE,
+   depthVideo
+);
+
+<script id="fragment-shader" type="x-shader/x-fragment">
+  varying vec2 v_texCoord;
+  // u_tex points to the texture unit containing the depth texture.
+  uniform sampler2D u_tex;
+  uniform float far;
+  uniform float near;
+  uniform bool isRangeInverse;
+  void main() {
+    vec4 floatColor = texture2D(u_tex, v_texCoord);
+    float dn = floatColor.r;
+    float depth = 0.;
+    if (isRangeInverse) {
+      depth = far * near / ( far - dn * ( far - near));
+    } else {
+      // Otherwise, using range linear
+      depth = dn * ( far - near ) + near;
+    }
+    // ...
+  }
+</script>
+
+
+

8. + Privacy and security considerations +

This section is non-normative.

+

+ The + privacy and security considerations discussed in [GETUSERMEDIA] + apply to this extension specification. +

+
+
+

A. + Acknowledgements +

+

+ Thanks to everyone who contributed to the Use + Cases and Requirements, sent feedback and comments. Special thanks + to Ningxin Hu for experimental implementations, as well as to the + Project Tango for their experiments. +

+
+ + +

B. References

B.1 Normative references

[GETUSERMEDIA]
Daniel Burnett; Adam Bergkvist; Cullen Jennings; Anant Narayanan. Media Capture and Streams. 14 April 2015. W3C Last Call Working Draft. URL: http://www.w3.org/TR/mediacapture-streams/ +
[HTML]
Ian Hickson. HTML Standard. Living Standard. URL: https://html.spec.whatwg.org/multipage/ +
[RFC2119]
S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 +
[WEBIDL]
Cameron McCormack; Boris Zbarsky. WebIDL Level 1. 4 August 2015. W3C Working Draft. URL: http://www.w3.org/TR/WebIDL-1/ +

B.2 Informative references

[MEDIACAPTURE-WORKER]
Chia-hung Tai; Robert O'Callahan; Tzuhao Kuo; Anssi Kostiainen. Media Capture Stream with Worker. W3C Editor's Draft. URL: https://w3c.github.io/mediacapture-worker/ +
[WEBGL]
Chris Marrin (Apple Inc.). WebGL Specification, Version 1.0. 10 February 2011. URL: https://www.khronos.org/registry/webgl/specs/1.0/ +
diff --git a/test/docs/metadata/tabular-data-model.html b/test/docs/metadata/tabular-data-model.html new file mode 100644 index 000000000..21d6fb931 --- /dev/null +++ b/test/docs/metadata/tabular-data-model.html @@ -0,0 +1,3049 @@ + + + + + + Model for Tabular Data and Metadata on the Web + + + + + + + + +

Abstract

+

+ Tabular data is routinely transferred on the web in a variety of formats, including variants on CSV, tab-delimited files, fixed field formats, spreadsheets, HTML tables, and SQL dumps. This document outlines a data model, or infoset, for tabular data and metadata about that tabular data that can be used as a basis for validation, display, or creating other formats. It also contains some non-normative guidance for publishing tabular data as CSV and how that maps into the tabular data model. +

+

+ An annotated model of tabular data can be supplemented by separate metadata about the table. This specification defines how implementations should locate that metadata, given a file containing tabular data. The standard syntax for that metadata is defined in [tabular-metadata]. Note, however, that applications may have other means to create annotated tables, e.g., through some application specific API-s; this model does not depend on the specificities described in [tabular-metadata]. +

+

Status of This Document

+ + + +

+ This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. +

+ + + +

+ The CSV on the Web Working Group was chartered to produce a recommendation "Access methods for CSV Metadata" as well as recommendations for "Metadata vocabulary for CSV data" and "Mapping mechanism to transforming CSV into various formats (e.g., RDF, JSON, or XML)". This document aims to primarily satisfy the "Access methods for CSV Metadata" recommendation (see section 5. Locating Metadata), though it also specifies an underlying model for tabular data and is therefore a basis for the other chartered Recommendations. +

+

+ This definition of CSV used in this document is based on IETF's [RFC4180] which is an Informational RFC. The working group's expectation is that future suggestions to refine RFC 4180 will be relayed to the IETF (e.g. around encoding and line endings) and contribute to its discussions about moving CSV to the Standards track. +

+

+ Many files containing tabular data embed metadata, for example in lines before the header row of an otherwise standard CSV document. This specification does not define any formats for embedding metadata within CSV files, aside from the titles of columns in the header row which is defined in CSV. We would encourage groups that define tabular data formats to also define a mapping into the annotated tabular data model defined in this document. +

+ + +

+ This document was published by the CSV on the Web Working Group as a Recommendation. + + + If you wish to make comments regarding this document, please send them to + public-csv-wg@w3.org + (subscribe, + archives). + + + + + + + All comments are welcome. + + +

+ +

+ Please see the Working Group's implementation + report. +

+ + + + +

+ This document has been reviewed by W3C Members, by software developers, and by other W3C + groups and interested parties, and is endorsed by the Director as a W3C Recommendation. + It is a stable document and may be used as reference material or cited from another + document. W3C's role in making the Recommendation is to draw attention to the + specification and to promote its widespread deployment. This enhances the functionality + and interoperability of the Web. +

+ + +

+ + This document was produced by + + a group + operating under the + 5 February 2004 W3C Patent + Policy. + + + + + W3C maintains a public list of any patent + disclosures + + made in connection with the deliverables of + + the group; that page also includes + + instructions for disclosing a patent. An individual who has actual knowledge of a patent + which the individual believes contains + Essential + Claim(s) must disclose the information in accordance with + section + 6 of the W3C Patent Policy. + + +

+ +

This document is governed by the 1 September 2015 W3C Process Document. +

+ + + + + + +

Table of Contents

+ +
+

1. Introduction

+

+ Tabular data is data that is structured into rows, each of which contains information about some thing. Each row contains the same number of cells (although some of these cells may be empty), which provide values of properties of the thing described by the row. In tabular data, cells within the same column provide values for the same property of the things described by each row. This is what differentiates tabular data from other line-oriented formats. +

+

+ Tabular data is routinely transferred on the web in a textual format called CSV, but the definition of CSV in practice is very loose. Some people use the term to mean any delimited text file. Others stick more closely to the most standard definition of CSV that there is, [RFC4180]. Appendix A describes the various ways in which CSV is defined. This specification refers to such files, as well as tab-delimited files, fixed field formats, spreadsheets, HTML tables, and SQL dumps as tabular data files. +

+

+ In section 4. Tabular Data Models, this document defines a model for tabular data that abstracts away from the varying syntaxes that are used for when exchanging tabular data. The model includes annotations, or metadata, about collections of individual tables, rows, columns, and cells. These annotations are typically supplied through separate metadata files; section 5. Locating Metadata defines how these metadata files can be located, while [tabular-metadata] defines what they contain. +

+

+ Once an annotated table has been created, it can be processed in various ways, such as display, validation, or conversion into other formats. This processing is described in section 6. Processing Tables. +

+

+ This specification does not normatively define a format for exchanging tabular data. However, it does provide some best practice guidelines for publishing tabular data as CSV, in section section 7. Best Practice CSV, and for parsing both this syntax and those similar to it, in section 8. Parsing Tabular Data. +

+
+

2. Conformance

+

+ As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, + and notes in this specification are non-normative. Everything else in this specification is + normative. +

+

The key words MAY, MUST, MUST NOT, SHOULD, and SHOULD NOT are + to be interpreted as described in [RFC2119]. +

+ +

This specification makes use of the compact IRI Syntax; please refer to the Compact IRIs from [JSON-LD].

+ +

This specification makes use of the following namespaces:

+
+
csvw:
+
http://www.w3.org/ns/csvw#
+
dc:
+
http://purl.org/dc/terms/
+
rdf:
+
http://www.w3.org/1999/02/22-rdf-syntax-ns#
+
rdfs:
+
http://www.w3.org/2000/01/rdf-schema#
+
schema:
+
http://schema.org/
+
xsd:
+
http://www.w3.org/2001/XMLSchema#
+
+ +
+
+

3. Typographical conventions

+

The following typographic conventions are used in this specification:

+ +
+
markup
+
Markup (elements, attributes, properties), machine processable values (string, characters, media types), property name, or a file name is in red-orange monospace font.
+
variable
+
A variable in pseudo-code or in an algorithm description is in italics.
+
definition
+
A definition of a term, to be used elsewhere in this or other specifications, is in bold and italics.
+
definition reference
+
A reference to a definition in this document is underlined and is also an active link to the definition itself.
+
markup definition reference
+
A references to a definition in this document, when the reference itself is also a markup, is underlined, red-orange monospace font, and is also an active link to the definition itself.
+
external definition reference
+
A reference to a definition in another document is underlined, in italics, and is also an active link to the definition itself.
+
markup external definition reference
+
A reference to a definition in another document, when the reference itself is also a markup, is underlined, in italics red-orange monospace font, and is also an active link to the definition itself.
+
hyperlink
+
A hyperlink is underlined and in blue.
+
[reference]
+
A document reference (normative or informative) is enclosed in square brackets and links to the references section.
+
+ +
Note

Notes are in light green boxes with a green left border and with a "Note" header in green. Notes are normative or informative depending on the whether they are in a normative or informative section, respectively.

+ +
Example 1
Examples are in light khaki boxes, with khaki left border, and with a 
+numbered "Example" header in khaki. Examples are always informative. 
+The content of the example is in monospace font and may be syntax colored.
+
+
+
+

4. Tabular Data Models

+

+ This section defines an annotated tabular data model: a model for tables that are annotated with metadata. Annotations provide information about the cells, rows, columns, tables, and groups of tables with which they are associated. The values of these annotations may be lists, structured objects, or atomic values. Core annotations are those that affect the behavior of processors defined in this specification, but other annotations may also be present on any of the components of the model. +

+

+ Annotations may be described directly in [tabular-metadata], be embedded in a tabular data file, or created during the process of generating an annotated table. +

+

+ String values within the tabular data model (such as column titles or cell string values) MUST contain only Unicode characters. +

+
Note

+ In this document, the term annotation refers to any metadata associated with an object in the annotated tabular data model. These are not necessarily web annotations in the sense of [annotation-model]. +

+
+

4.1 Table groups

+

+ A group of tables comprises a set of annotated tables and a set of annotations that relate to that group of tables. The core annotations of a group of tables are: +

+
    +
  • id — an identifier for this group of tables, or null if this is undefined.
  • +
  • notes — any number of additional annotations on the group of tables. This annotation may be empty.
  • +
  • tables — the list of tables in the group of tables. A group of tables MUST have one or more tables.
  • +
+

+ Groups of tables MAY in addition have any number of annotations which provide information about the group of tables. Annotations on a group of tables may include: +

+
    +
  • titles or descriptions of the group of tables.
  • +
  • information about the source or provenance of the group of tables.
  • +
  • links to other groups of tables (e.g. to those that provide similar data from a different time period).
  • +
+ +

When originating from [tabular-metadata], these annotations arise from common properties defined on table group descriptions within metadata documents.

+ +
+
+

4.2 Tables

+

+ An annotated table is a table that is annotated with additional metadata. The core annotations of a table are: +

+
    +
  • columns — the list of columns in the table. A table MUST have one or more columns and the order of the columns within the list is significant and MUST be preserved by applications.
  • +
  • table direction — the direction in which the columns in the table should be displayed, as described in section 6.5.1 Bidirectional Tables; the value of this annotation may also become the value of the text direction annotation on columns and cells within the table, if the textDirection property is set to inherit (the default).
  • +
  • foreign keys — a list of foreign keys on the table, as defined in [tabular-metadata], which may be an empty list.
  • +
  • id — an identifier for this table, or null if this is undefined.
  • +
  • notes — any number of additional annotations on the table. This annotation may be empty.
  • +
  • rows — the list of rows in the table. A table MUST have one or more rows and the order of the rows within the list is significant and MUST be preserved by applications.
  • +
  • schema — a URL referencing a schema applied to this table, or null.
  • +
  • suppress output — a boolean that indicates whether or not this table should be suppressed in any output generated from converting the group of tables, that this table belongs to, into another format, as described in section 6.7 Converting Tables.
  • +
  • transformations — a (possibly empty) list of specifications for converting this table into other formats, as defined in [tabular-metadata].
  • +
  • url — the URL of the source of the data in the table, or null if this is undefined.
  • +
+

+ The table MAY in addition have any number of other annotations. Annotations on a table may include: +

+
    +
  • titles or descriptions of the table,
  • +
  • information about the source or provenance of the data in the table, or
  • +
  • links to other tables (e.g. to indicate tables that include related information).
  • +
+

When originating from [tabular-metadata], these annotations arise from common properties defined on table descriptions within metadata documents.

+
+
+

4.3 Columns

+

+ A column represents a vertical arrangement of cells within a table. The core annotations of a column are: +

+
    +
  • about URL — the about URL URI template used to create a URL identifier for each value of cell in this column relative to the row in which it is contained, as defined in [tabular-metadata].
  • +
  • cells — the list of cells in the column. A column MUST contain one cell from each row in the table. The order of the cells in the list MUST match the order of the rows in which they appear within the rows for the associated table.
  • +
  • datatype — the expected datatype for the values of cells in this column, as defined in [tabular-metadata].
  • +
  • default — the default value for cells whose string value is an empty string.
  • +
  • lang — the code for the expected language for the values of cells in this column, expressed in the format defined by [BCP47].
  • +
  • name — the name of the column.
  • +
  • null — the string or strings which cause the value of cells having string value matching any of these values to be null.
  • +
  • number — the position of the column amongst the columns for the associated table, starting from 1.
  • +
  • ordered — a boolean that indicates whether the order of values of a cell should be preserved or not.
  • +
  • property URL — the expected property URL URI template used to create a URL identifier for the property of each value of cell in this column relative to the row in which it is contained, as defined in [tabular-metadata].
  • +
  • required — a boolean that indicates that values of cells in this column MUST NOT be empty.
  • +
  • separator — a string value used to create multiple values of cells in this column by splitting the string value on the separator.
  • +
  • source number — the position of the column in the file at the url of the table, starting from 1, or null.
  • +
  • suppress output — a boolean that indicates whether or not this column should be suppressed in any output generated from converting the table, as described in section 6.7 Converting Tables.
  • +
  • table — the table in which the column appears.
  • +
  • text direction — the indicator of the text direction values of cells in this column, as described in section 6.5.1 Bidirectional Tables; the value of this annotation may be derived from the table direction annotation on the table, if the textDirection property is set to inherit (the default).
  • +
  • titles — any number of human-readable titles for the column, each of which MAY have an associated language code as defined by [BCP47].
  • +
  • value URL — the expected value URL URI template used to create the URL identifier for the value of each cell in this, as defined in [tabular-metadata].
  • +
  • virtual — a boolean that indicates whether the column is a virtual column. Virtual columns are used to extend the source data with additional empty columns to support more advanced conversions; when this annotation is false, the column is a real column, which exists in the source data for the table.
  • +
+
Note

+ Several of these annotations arise from inherited properties that may be defined within metadata on table group, table or individual column descriptions. +

+

+ Columns MAY in addition have any number of other annotations, such as a description. When originating from [tabular-metadata], these annotations arise from common properties defined on column descriptions within metadata documents.

+
+
+

4.4 Rows

+

+ A row represents a horizontal arrangement of cells within a table. The core annotations of a row are: +

+
    +
  • cells — the list of cells in the row. A row MUST contain one cell from each column in the table. The order of the cells in the list MUST match the order of the columns in which they appear within the table columns for the row's table.
  • +
  • number — the position of the row amongst the rows for the table, starting from 1.
  • +
  • primary key — a possibly empty list of cells whose values together provide a unique identifier for this row. This is similar to the name of a column.
  • +
  • titles — any number of human-readable titles for the row, each of which MAY have an associated language code as defined by [BCP47].
  • +
  • referenced rows — a possibly empty list of pairs of a foreign key and a row in a table within the same group of tables (which may be another row in the table in which this row appears).
  • +
  • source number — the position of the row in the original url of the table, starting from 1, or null.
  • +
  • table — the table in which the row appears.
  • +
+

+ Rows MAY have any number of additional annotations. The annotations on a row provide additional metadata about the information held in the row, such as: +

+
    +
  • the certainty of the information in that row.
  • +
  • information about the source or provenance of the data in that row.
  • +
+

+ Neither this specification nor [tabular-metadata] defines a method to specify such annotations. Implementations MAY define a method for adding annotations to rows by interpreting notes on the table. +

+
+
+

4.5 Cells

+

+ A cell represents a cell at the intersection of a row and a column within a table. The core annotations of a cell are: +

+
    +
  • about URL — an absolute URL for the entity about which this cell provides information, or null.
  • +
  • column — the column in which the cell appears; the cell MUST be in the cells for that column.
  • +
  • errors — a (possibly empty) list of validation errors generated while parsing the value of the cell.
  • +
  • ordered — a boolean that, if the value of this cell is a list, indicates whether the order of that list should be preserved or not.
  • +
  • property URL — an absolute URL for the property associated with this cell, or null.
  • +
  • row — the row in which the cell appears; the cell MUST be in the cells for that row.
  • +
  • string value — a string that is the original syntactic representation of the value of the cell, e.g. how the cell appears within a CSV file; this may be an empty string.
  • +
  • table — the table in which the cell appears.
  • +
  • text direction — which direction the text within the cell should be displayed, as described in section 6.5.1 Bidirectional Tables; the value of this annotation may be derived from the table direction annotation on the table, if the textDirection property is set to inherit (the default).
  • +
  • value — the semantic value of the cell; this MAY be a list of values, each of which MAY have a datatype other than a string, MAY have a language and MAY be null. For example, annotations might enable a processor to understand the string value of the cell as representing a number or a date. By default, if the string value is an empty string, the semantic value of the cell is null.
  • +
  • value URL — an absolute URL for this cell's value, or null.
  • +
+
Note

+ There presence or absence of quotes around a value within a CSV file is a syntactic detail that is not reflected in the tabular data model. In other words, there is no distinction in the model between the second value in a,,z and the second value in a,"",z. +

+
Note

+ Several of these annotations arise from or are constructed based on inherited properties that may be defined within metadata on table group, table or column descriptions. +

+

+ Cells MAY have any number of additional annotations. The annotations on a cell provide metadata about the value held in the cell, particularly when this overrides the information provided for the column and row that the cell falls within. Annotations on a cell might be: +

+
    +
  • notes to aid the interpretation of the value.
  • +
  • information about the source or provenance of the data in that cell.
  • +
  • indication of the units of measure used within a cell.
  • +
+

+ Neither this specification nor [tabular-metadata] defines a method to specify such annotations. Implementations MAY define a method for adding annotations to cells by interpreting notes on the table. +

+
Note

+ Units of measure are not a built-in part of the tabular data model. However, they can be captured through notes or included in the converted output of tabular data through defining datatypes with identifiers that indicate the unit of measure, using virtual columns to create nested data structures, or using common properties to specify Data Cube attributes as defined in [vocab-data-cube]. +

+
+
+

4.6 Datatypes

+

+ Columns and cell values within tables may be annotated with a datatype which indicates the type of the values obtained by parsing the string value of the cell. +

+

+ Datatypes are based on a subset of those defined in [xmlschema11-2]. The annotated tabular data model limits cell values to have datatypes as shown on the diagram: +

+
    +
  • the datatypes defined in [xmlschema11-2] as derived from and including xsd:anyAtomicType.
  • +
  • the datatype rdf:XMLLiteral, a sub-type of xsd:string, which indicates the value is an XML fragment.
  • +
  • the datatype rdf:HTML, a sub-type of xsd:string, which indicates the value is an HTML fragment.
  • +
  • the datatype csvw:JSON, a sub-type of xsd:string, which indicates the value is serialized JSON.
  • +
  • datatypes derived from any of these datatypes.
  • +
+
+ Built-in Datatype Hierarchy diagram +
Fig. 1 Diagram showing the built-in datatypes, based on [xmlschema11-2]; names in parentheses denote aliases to the [xmlschema11-2] terms (see the diagram in SVG or PNG formats)
+
+

The core annotations of a datatype are:

+ +

+ If the id of a datatype is that of a built-in datatype, the values of the other core annotations listed above MUST be consistent with the values defined in [xmlschema11-2] or above. For example, if the id is xsd:integer then the base must be xsd:decimal. +

+

+ Datatypes MAY have any number of additional annotations. The annotations on a datatype provide metadata about the datatype such as title or description. These arise from common properties defined on datatype descriptions within metadata documents, as defined in [tabular-metadata]. +

+
Note

+ The id annotation may reference an XSD, OWL or other datatype definition, which is not used by this specification for validating column values, but may be useful for further processing. +

+
+

4.6.1 Length Constraints

+

+ The length, minimum length and maximum length annotations indicate the exact, minimum and maximum lengths for cell values. +

+

+ The length of a value is determined as defined in [xmlschema11-2], namely as follows: +

+
    +
  • if the value is null, its length is zero.
  • +
  • if the value is a string or one of its subtypes, its length is the number of characters (ie [UNICODE] code points) in the value.
  • +
  • if the value is of a binary type, its length is the number of bytes in the binary value.
  • +
+

If the value is a list, the constraint applies to each element of the list.

+
+
+

4.6.2 Value Constraints

+

+ The minimum, maximum, minimum exclusive, and maximum exclusive annotations indicate limits on cell values. These apply to numeric, date/time, and duration types. +

+

+ Validation of cell values against these datatypes is as defined in [xmlschema11-2]. If the value is a list, the constraint applies to each element of the list. +

+
+
+
+
+

5. Locating Metadata

+

+ As described in section 4. Tabular Data Models, tabular data may have a number of annotations associated with it. Here we describe the different methods that can be used to locate metadata that provides those annotations. +

+

+ In the methods of locating metadata described here, metadata is provided within a single document. The syntax of such documents is defined in [tabular-metadata]. Metadata is located using a specific order of precedence: +

+
    +
  1. metadata supplied by the user of the implementation that is processing the tabular data, see section 5.1 Overriding Metadata.
  2. +
  3. metadata in a document linked to using a Link header associated with the tabular data file, see section 5.2 Link Header.
  4. +
  5. metadata located through default paths which may be overridden by a site-wide location configuration, see section 5.3 Default Locations and Site-wide Location Configuration.
  6. +
  7. metadata embedded within the tabular data file itself, see section 5.4 Embedded Metadata.
  8. +
+

+ Processors MUST use the first metadata found for processing a tabular data file by using overriding metadata, if provided. Otherwise processors MUST attempt to locate the first metadata document from the Link header or the metadata located through site-wide configuration. If no metadata is supplied or found, processors MUST use embedded metadata. If the metadata does not originate from the embedded metadata, validators MUST verify that the table group description within that metadata is compatible with that in the embedded metadata, as defined in [tabular-metadata]. +

+
Note

+ When feasible, processors should start from a metadata file and publishers should link to metadata files directly, rather than depend on mechanisms outlined in this section for locating metadata from a tabular data file. Otherwise, if possible, publishers should provide a Link header on the tabular data file as described in section 5.2 Link Header. +

+
Note

+ If there is no site-wide location configuration, section 5.3 Default Locations and Site-wide Location Configuration specifies default URI patterns or paths to be used to locate metadata.

+

+
+

5.1 Overriding Metadata

+

+ Processors SHOULD provide users with the facility to provide their own metadata for tabular data files that they process. This might be provided: +

+
    +
  • through processor options, such as command-line options for a command-line implementation or checkboxes in a GUI.
  • +
  • by enabling the user to select an existing metadata file, which may be local or remote.
  • +
  • by enabling the user to specify a series of metadata files, which are merged by the processor and handled as if they were a single file.
  • +
+

+ For example, a processor might be invoked with: +

+
Example 2: Command-line CSV processing with column types
$ csvlint data.csv --datatypes:string,float,string,string
+

+ to enable the testing of the types of values in the columns of a CSV file, or with: +

+
Example 3: Command-line CSV processing with a schema
$ csvlint data.csv --schema:schema.json
+

+ to supply a schema that describes the contents of the file, against which it can be validated. +

+

+ Metadata supplied in this way is called overriding, or user-supplied, metadata. Implementations SHOULD define how any options they define are mapped into the vocabulary defined in [tabular-metadata]. If the user selects existing metadata files, implementations MUST NOT use metadata located through the Link header (as described in section 5.2 Link Header) or site-wide location configuration (as described in section 5.3 Default Locations and Site-wide Location Configuration). +

+
Note

+ Users should ensure that any metadata from those locations that they wish to use is explicitly incorporated into the overriding metadata that they use to process tabular data. Processors may provide facilities to make this easier by automatically merging metadata files from different locations, but this specification does not define how such merging is carried out. +

+
+ +
+

5.3 Default Locations and Site-wide Location Configuration

+

+ If the user has not supplied a metadata file as overriding metadata, described in section 5.1 Overriding Metadata, and no applicable metadata file has been discovered through a Link header, described in section 5.2 Link Header, processors MUST attempt to locate a metadata documents through site-wide configuration. +

+

+ In this case, processors MUST retrieve the file from the well-known URI /.well-known/csvm. (Well-known URIs are defined by [RFC5785].) If no such file is located (i.e. the response results in a client error 4xx status code or a server error 5xx status code), processors MUST proceed as if this file were found with the following content which defines default locations: +

+
{+url}-metadata.json
+csv-metadata.json
+        
+

+ The response to retrieving /.well-known/csvm MAY be cached, subject to cache control directives. This includes caching an unsuccessful response such as a 404 Not Found. +

+

+ This file MUST contain a URI template, as defined by [URI-TEMPLATE], on each line. Starting with the first such URI template, processors MUST: +

+
    +
  1. Expand the URI template, with the variable url being set to the URL of the requested tabular data file (with any fragment component of that URL removed).
  2. +
  3. Resolve the resulting URL against the URL of the requested tabular data file.
  4. +
  5. Attempt to retrieve a metadata document at that URL.
  6. +
  7. If no metadata document is found at that location, or if the metadata file found at the location does not explicitly include a reference to the relevant tabular data file, perform these same steps on the next URI template, otherwise use that metadata document.
  8. +
+

+ For example, if the tabular data file is at http://example.org/south-west/devon.csv then processors must attempt to locate a well-known file at http://example.org/.well-known/csvm. If that file contains: +

+
Example 5
{+url}.json
+csvm.json
+/csvm?file={url}
+

+ the processor will first look for http://example.org/south-west/devon.csv.json. If there is no metadata file in that location, it will then look for http://example.org/south-west/csvm.json. Finally, if that also fails, it will look for http://example.org/csvm?file=http://example.org/south-west/devon.csv.json. +

+

+ If no file were found at http://example.org/.well-known/csvm, the processor will use the default locations and try to retrieve metadata from http://example.org/south-west/devon.csv-metadata.json and, if unsuccessful, http://example.org/south-west/csv-metadata.json. +

+
+
+

5.4 Embedded Metadata

+

+ Most syntaxes for tabular data provide a facility for embedding metadata within the tabular data file itself. The definition of a syntax for tabular data SHOULD include a description of how the syntax maps to an annotated data model, and in particular how any embedded metadata is mapped into the vocabulary defined in [tabular-metadata]. Parsing based on the default dialect for CSV, as described in 8. Parsing Tabular Data, will extract column titles from the first row of a CSV file. +

+
Example 6: http://example.org/tree-ops.csv
GID,On Street,Species,Trim Cycle,Inventory Date
+1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010
+2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
+

The results of this can be found in section 8.2.1 Simple Example.

+

For another example, the following tab-delimited file contains embedded metadata where it is assumed that comments may be added using a #, and that the column types may be indicated using a #datatype annotation: +

+
Example 7: Tab-separated file containing embedded metadata
# publisher City of Palo Alto
+# updated 12/31/2010
+#name GID on_street species trim_cycle  inventory_date
+#datatype string  string  string  string  date:M/D/YYYY
+  GID On Street Species Trim Cycle  Inventory Date
+  1 ADDISON AV  Celtis australis  Large Tree Routine Prune  10/18/2010
+  2 EMERSON ST  Liquidambar styraciflua Large Tree Routine Prune  6/2/2010
+

+ A processor that recognises this format may be able to extract and make sense of this embedded metadata. +

+
+
+
+

6. Processing Tables

+

+ This section describes how particular types of applications should process tabular data and metadata files. +

+

+ In many cases, an application will start processing from a metadata file. In that case, the initial metadata file is treated as overriding metadata and the application MUST NOT continue to retrieve other available metadata about each of the tabular data files referenced by that initial metadata file other than embedded metadata. +

+

+ In other cases, applications will start from a tabular data file, such as a CSV file, and locate metadata from that file. This metadata will be used to process the file as if the processor were starting from that metadata file. +

+

+ For example, if a validator is passed a locally authored metadata file spending.json, which contains: +

+
Example 8: Metadata file referencing multiple tabular data files sharing a schema
{
+  "tableSchema": "government-spending.csv",
+  "tables": [{
+    "url": "http://example.org/east-sussex-2015-03.csv",
+  }, {
+    "url": "http://example.org/east-sussex-2015-02.csv"
+  }, ...
+  ]
+}
+

+ the validator would validate all the listed tables, using the locally defined schema at government-spending.csv. It would also use the metadata embedded in the referenced CSV files; for example, when processing http://example.org/east-sussex-2015-03.csv, it would use embedded metadata within that file to verify that the CSV is compatible with the metadata. +

+

+ If a validator is passed a tabular data file http://example.org/east-sussex-2015-03.csv, the validator would use the metadata located from the CSV file: the first metadata file found through the Link headers found when retrieving that file, or located through a site-wide location configuration. +

+
Note

Starting with a metadata file can remove the need to perform additional requests to locate linked metadata, or metadata retrieved through site-wide location configuration

+
+

6.1 Creating Annotated Tables

+

After locating metadata, metadata is normalized and coerced into a single table group description. When starting with a metadata file, this involves normalizing the provided metadata file and verifying that the embedded metadata for each tabular data file referenced from the metadata is compatible with the metadata. When starting with a tabular data file, this involves locating the first metadata file as described in section 5. Locating Metadata and normalizing into a single descriptor. +

+

If processing starts with a tabular data file, implementations:

+
    +
  1. Retrieve the tabular data file.
  2. +
  3. Retrieve the first metadata file (FM) as described in section 5. Locating Metadata: +
      +
    1. metadata supplied by the user (see section 5.1 Overriding Metadata).
    2. +
    3. metadata referenced from a Link Header that may be returned when retrieving the tabular data file (see section 5.2 Link Header).
    4. +
    5. metadata retrieved through a site-wide location configuration (see section 5.3 Default Locations and Site-wide Location Configuration).
    6. +
    7. embedded metadata as defined in section 5.4 Embedded Metadata with a single tables entry where the url property is set from that of the tabular data file.
    8. +
    +
  4. +
  5. Proceed as if the process starts with FM.
  6. +
+

If the process starts with a metadata file:

+
    +
  1. Retrieve the metadata file yielding the metadata UM (which is treated as overriding metadata, see section 5.1 Overriding Metadata).
  2. +
  3. Normalize UM using the process defined in Normalization in [tabular-metadata], coercing UM into a table group description, if necessary.
  4. +
  5. For each table (TM) in UM in order, create one or more annotated tables: +
      +
    1. Extract the dialect description (DD) from UM for the table associated with the tabular data file. If there is no such dialect description, extract the first available dialect description from a group of tables in which the tabular data file is described. Otherwise use the default dialect description.
    2. +
    3. If using the default dialect description, override default values in DD based on HTTP headers found when retrieving the tabular data file: +
        +
      • If the media type from the Content-Type header is text/tab-separated-values, set delimiter to TAB in DD.
      • +
      • If the Content-Type header includes the header parameter with a value of absent, set header to false in DD.
      • +
      • If the Content-Type header includes the charset parameter, set encoding to this value in DD.
      • +
      +
    4. +
    5. +

      Parse the tabular data file, using DD as a guide, to create a basic tabular data model (T) and extract embedded metadata (EM), for example from the header line.

      +
      Note

      This specification provides a non-normative definition for parsing CSV-based files, including the extraction of embedded metadata, in section 8. Parsing Tabular Data. This specification does not define any syntax for embedded metadata beyond this; whatever syntax is used, it's assumed that metadata can be mapped to the vocabulary defined in [tabular-metadata].

      +
    6. +
    7. If a Content-Language HTTP header was found when retrieving the tabular data file, and the value provides a single language, set the lang inherited property to this value in TM, unless TM already has a lang inherited property.
    8. +
    9. Verify that TM is compatible with EM using the procedure defined in Table Description Compatibility in [tabular-metadata]; if TM is not compatible with EM validators MUST raise an error, other processors MUST generate a warning and continue processing.
    10. +
    11. Use the metadata TM to add annotations to the tabular data model T as described in Section 2 Annotating Tables in [tabular-metadata].
    12. +
    +
  6. +
+
+
+

6.2 Metadata Compatibility

+

When processing a tabular data file using metadata as discovered using section 5. Locating Metadata, processors MUST ensure that the metadata and tabular data file are compatible, this is typically done by extracting embedded metadata from the tabular data file and determining that the provided or discovered metadata is compatible with the embedded metadata using the procedure defined in Table Compatibility in [tabular-metadata].

+
+
+

6.3 URL Normalization

+

Metadata Discovery and Compatibility involve comparing URLs. When comparing URLs, processors MUST use Syntax-Based Normalization as defined in [RFC3968]. Processors MUST perform Scheme-Based Normalization for HTTP (80) and HTTPS (443) and SHOULD perform Scheme-Based Normalization for other well-known schemes.

+
+
+

6.4 Parsing Cells

+

+ Unlike many other data formats, tabular data is designed to be read by humans. For that reason, it's common for data to be represented within tabular data in a human-readable way. The + datatype, + default, + lang, + null, + required, and + separator annotations provide the information needed to parse the string value of a cell into its (semantic) value annotation. This is used: +

+
    +
  • by validators to check that the data in the table is in the expected format,
  • +
  • by converters to parse the values before mapping them into values in the target of the conversion,
  • +
  • when displaying data, to map it into formats that are meaningful for those viewing the data (as opposed to those publishing it), and
  • +
  • when inputting data, to turn entered values into representations in a consistent format.
  • +
+

The process of parsing a cell creates a cell with annotations based on the original string value, parsed value and other column annotations and adds the cell to the list of cells in a row and cells in a column:

+ +

+ After parsing, the cell value can be: +

+
    +
  • null,
  • +
  • a single value with an associated optional datatype or language, or
  • +
  • a sequence of such values.
  • +
+

+ The process of parsing the string value into a single value or a list of values is as follows: +

+
    +
  1. unless the datatype base is string, json, xml, html or anyAtomicType, replace all carriage return (#xD), line feed (#xA), and tab (#x9) characters with space characters.
  2. +
  3. unless the datatype base is string, json, xml, html, anyAtomicType, or normalizedString, strip leading and trailing whitespace from the string value and replace all instances of two or more whitespace characters with a single space character.
  4. +
  5. if the normalized string is an empty string, apply the remaining steps to the string given by the column default annotation.
  6. +
  7. if the column separator annotation is not null and the normalized string is an empty string, the cell value is an empty list. If the column required annotation is true, add an error to the list of errors for the cell.
  8. +
  9. if the column separator annotation is not null, the cell value is a list of values; set the list annotation on the cell to true, and create the cell value created by: +
      +
    1. if the normalized string is the same as any one of the values of the column null annotation, then the resulting value is null.
    2. +
    3. split the normalized string at the character specified by the column separator annotation.
    4. +
    5. unless the datatype base is string or anyAtomicType, strip leading and trailing whitespace from these strings.
    6. +
    7. applying the remaining steps to each of the strings in turn.
    8. +
    +
  10. +
  11. if the string is an empty string, apply the remaining steps to the string given by the column default annotation.
  12. +
  13. if the string is the same as any one of the values of the column null annotation, then the resulting value is null. If the column separator annotation is null and the column required annotation is true, add an error to the list of errors for the cell.
  14. +
  15. parse the string using the datatype format if one is specified, as described below to give a value with an associated datatype. If the datatype base is string, or there is no datatype, the value has an associated language from the column lang annotation. If there are any errors, add them to the list of errors for the cell; in this case the value has a datatype of string; if the datatype base is string, or there is no datatype, the value has an associated language from the column lang annotation.
  16. +
  17. validate the value based on the length constraints described in section 4.6.1 Length Constraints, the value constraints described in section 4.6.2 Value Constraints and the datatype format annotation if one is specified, as described below. If there are any errors, add them to the list of errors for the cell.
  18. +
+

The final value (or values) become the value annotation on the cell.

+

If there is a about URL annotation on the column, it becomes the about URL annotation on the cell, after being transformed into an absolute URL as described in URI Template Properties of [tabular-metadata].

+

If there is a property URL annotation on the column, it becomes the property URL annotation on the cell, after being transformed into an absolute URL as described in URI Template Properties of [tabular-metadata].

+

If there is a value URL annotation on the column, it becomes the value URL annotation on the cell, after being transformed into an absolute URL as described in URI Template Properties of [tabular-metadata]. The value URL annotation is null if the cell value is null and the column virtual annotation is false.

+
+

6.4.1 Parsing examples

This section is non-normative.

+

+ When datatype annotation is available, the value of a cell is the same as its string value. For example, a cell with a string value of "99" would similarly have the (semantic) value "99". +

+

+ If a datatype base is provided for the cell, that is used to create a (semantic) value for the cell. For example, if the metadata contains: +

+
Example 9
"datatype": "integer"
+

+ for the cell with the string value "99" then the value of that cell will be the integer 99. A cell whose string value was not a valid integer (such as "one" or "1.0") would be assigned that string value as its (semantic) value annotation, but also have a validation error listed in its errors annotation. +

+

+ Sometimes data uses special codes to indicate unknown or null values. For example, a particular column might contain a number that is expected to be between 1 and 10, with the string 99 used in the original tabular data file to indicate a null value. The metadata for such a column would include: +

+
Example 10
"datatype": {
+  "base": "integer",
+  "minimum": 1,
+  "maximum": 10
+},
+"null": "99"
+

+ In this case, a cell with a string value of "5" would have the (semantic) value of the integer 5; a cell with a string value of "99" would have the value null. +

+

+ Similarly, a cell may be assigned a default value if the string value for the cell is empty. A configuration such as: +

+
Example 11
"datatype": {
+  "base": "integer",
+  "minimum": 1,
+  "maximum": 10
+},
+"default": "5"
+

+ In this case, a cell whose string value is "" would be assigned the value of the integer 5. A cell whose string value contains whitespace, such as a single tab character, would also be assigned the value of the integer 5: when the datatype is something other than string or anyAtomicType, leading and trailing whitespace is stripped from string values before the remainder of the processing is carried out. +

+

+ Cells can contain sequences of values. For example, a cell might have the string value "1 5 7.0". In this case, the separator is a space character. The appropriate configuration would be: +

+
Example 12
"datatype": {
+  "base": "integer",
+  "minimum": 1,
+  "maximum": 10
+},
+"default": "5",
+"separator": " "
+

+ and this would mean that the cell's value would be an array containing two integers and a string: [1, 5, "7.0"]. The final value of the array is a string because it is not a valid integer; the cell's errors annotation will also contain a validation error. +

+

+ Also, with this configuration, if the string value of the cell were "" (i.e. it was an empty cell) the value of the cell would be an empty list. +

+

+ A cell value can be inserted into a URL created using a URI template property such as valueUrl. For example, if a cell with the string value "1 5 7.0" were in a column named values, defined with: +

+
Example 13
"datatype": "decimal",
+"separator": " ",
+"valueUrl": "{?values}"
+

+ then after expansion of the URI template, the resulting valueUrl would be ?values=1.0,5.0,7.0. The canonical representations of the decimal values are used within the URL. +

+
+
+

6.4.2 Formats for numeric types

+

+ By default, numeric values must be in the formats defined in [xmlschema11-2]. It is not uncommon for numbers within tabular data to be formatted for human consumption, which may involve using commas for decimal points, grouping digits in the number using commas, or adding percent signs to the number. +

+

+ If the datatype base is a numeric type, the datatype format annotation indicates the expected format for that number. Its value MUST be either a single string or an object with one or more of the properties: +

+
+
decimalChar
+
A string whose value is used to represent a decimal point within the number. The default value is ".". If the supplied value is not a string, implementations MUST issue a warning and proceed as if the property had not been specified.
+
groupChar
+
A string whose value is used to group digits within the number. The default value is null. If the supplied value is not a string, implementations MUST issue a warning and proceed as if the property had not been specified.
+
pattern
+
A number format pattern as defined in [UAX35]. Implementations MUST recognise number format patterns containing the symbols 0, #, the specified decimalChar (or "." if unspecified), the specified groupChar (or "," if unspecified), E, +, % and . Implementations MAY additionally recognise number format patterns containing other special pattern characters defined in [UAX35]. If the supplied value is not a string, or if it contains an invalid number format pattern or uses special pattern characters that the implementation does not recognise, implementations MUST issue a warning and proceed as if the property had not been specified.
+
+

+ If the datatype format annotation is a single string, this is interpreted in the same way as if it were an object with a pattern property whose value is that string. +

+

+ If the groupChar is specified, but no pattern is supplied, when parsing the string value of a cell against this format specification, implementations MUST recognise and parse numbers that consist of: +

+
    +
  1. an optional + or - sign,
  2. +
  3. followed by a decimal digit (0-9),
  4. +
  5. followed by any number of decimal digits (0-9) and the string specified as the groupChar,
  6. +
  7. followed by an optional decimalChar followed by one or more decimal digits (0-9),
  8. +
  9. followed by an optional exponent, consisting of an E followed by an optional + or - sign followed by one or more decimal digits (0-9), or
  10. +
  11. followed by an optional percent (%) or per-mille () sign.
  12. +
+

+ or that are one of the special values: +

+
    +
  1. NaN,
  2. +
  3. INF, or
  4. +
  5. -INF.
  6. +
+

+ Implementations MAY also recognise numeric values that are in any of the standard-decimal, standard-percent or standard-scientific formats listed in the Unicode Common Locale Data Repository. +

+

+ Implementations MUST add a validation error to the errors annotation for the cell, and set the cell value to a string rather than a number if the string being parsed: +

+
    +
  • is not in the format specified in the pattern, if one is defined
  • +
  • otherwise, if the string +
      +
    • does not meet the numeric format defined above,
    • +
    • contains two consecutive groupChar strings,
    • +
    +
  • +
  • contains the decimalChar, if the datatype base is integer or one of its sub-types,
  • +
  • contains an exponent, if the datatype base is decimal or one of its sub-types, or
  • +
  • is one of the special values NaN, INF, or -INF, if the datatype base is decimal or one of its sub-types.
  • +
+

+ Implementations MUST use the sign, exponent, percent, and per-mille signs when parsing the string value of a cell to provide the value of the cell. For example, the string value "-25%" must be interpreted as -0.25 and the string value "1E6" as 1000000. +

+
+
+

6.4.3 Formats for booleans

+

+ Boolean values may be represented in many ways aside from the standard 1 and 0 or true and false. +

+

+ If the datatype base for a cell is boolean, the datatype format annotation provides the true value followed by the false value, separated by |. For example if format is Y|N then cells must hold either Y or N with Y meaning true and N meaning false. If the format does not follow this syntax, implementations MUST issue a warning and proceed as if no format had been provided. +

+

+ The resulting cell value will be one or more boolean true or false values. +

+
+
+

6.4.4 Formats for dates and times

+

+ By default, dates and times are assumed to be in the format defined in [xmlschema11-2]. However dates and times are commonly represented in tabular data in other formats. +

+

+ If the datatype base is a date or time type, the datatype format annotation indicates the expected format for that date or time. +

+

+ The supported date and time format patterns listed here are expressed in terms of the date field symbols defined in [UAX35]. These formats MUST be recognised by implementations and MUST be interpreted as defined in that specification. Implementations MAY additionally recognise other date format patterns. Implementations MUST issue a warning if the date format pattern is invalid or not recognised and proceed as if no date format pattern had been provided. +

+
Note

+ For interoperability, authors of metadata documents SHOULD use only the formats listed in this section. +

+

+ The following date format patterns MUST be recognized by implementations: +

+
    +
  • yyyy-MM-dd e.g., 2015-03-22
  • +
  • yyyyMMdd e.g., 20150322
  • +
  • dd-MM-yyyy e.g., 22-03-2015
  • +
  • d-M-yyyy e.g., 22-3-2015
  • +
  • MM-dd-yyyy e.g., 03-22-2015
  • +
  • M-d-yyyy e.g., 3-22-2015
  • +
  • dd/MM/yyyy e.g., 22/03/2015
  • +
  • d/M/yyyy e.g., 22/3/2015
  • +
  • MM/dd/yyyy e.g., 03/22/2015
  • +
  • M/d/yyyy e.g., 3/22/2015
  • +
  • dd.MM.yyyy e.g., 22.03.2015
  • +
  • d.M.yyyy e.g., 22.3.2015
  • +
  • MM.dd.yyyy e.g., 03.22.2015
  • +
  • M.d.yyyy e.g., 3.22.2015
  • +
+

+ The following time format patterns MUST be recognized by implementations: +

+
    +
  • HH:mm:ss.S with one or more trailing S characters indicating the maximum number of fractional seconds e.g., HH:mm:ss.SSS for 15:02:37.143
  • +
  • HH:mm:ss e.g., 15:02:37
  • +
  • HHmmss e.g., 150237
  • +
  • HH:mm e.g., 15:02
  • +
  • HHmm e.g., 1502
  • +
+

+ The following date/time format patterns MUST be recognized by implementations: +

+
    +
  • yyyy-MM-ddTHH:mm:ss.S with one or more trailing S characters indicating the maximum number of fractional seconds e.g., yyyy-MM-ddTHH:mm:ss.SSS for 2015-03-15T15:02:37.143
  • +
  • yyyy-MM-ddTHH:mm:ss e.g., 2015-03-15T15:02:37
  • +
  • yyyy-MM-ddTHH:mm e.g., 2015-03-15T15:02
  • +
  • any of the date formats above, followed by a single space, followed by any of the time formats above, e.g., M/d/yyyy HH:mm for 3/22/2015 15:02 or dd.MM.yyyy HH:mm:ss for 22.03.2015 15:02:37
  • +
+

+ Implementations MUST also recognise date, time, and date/time format patterns that end with timezone markers consisting of between one and three x or X characters, possibly after a single space. These MUST be interpreted as follows: +

+
    +
  • X e.g., -08, +0530, or Z (minutes are optional)
  • +
  • XX e.g., -0800, +0530, or Z
  • +
  • XXX e.g., -08:00, +05:30, or Z
  • +
  • x e.g., -08 or +0530 (Z is not permitted)
  • +
  • xx e.g., -0800 or +0530 (Z is not permitted)
  • +
  • xxx e.g., -08:00 or +05:30 (Z is not permitted)
  • +
+

+ For example, date format patterns could include yyyy-MM-ddTHH:mm:ssXXX for 2015-03-15T15:02:37Z or 2015-03-15T15:02:37-05:00, or HH:mm x for 15:02 -05. +

+

+ The cell value will one or more dates/time values extracted using the format. +

+
Note

+ For simplicity, this version of this standard does not support abbreviated or full month or day names, or double digit years. Future versions of this standard may support other date and time formats, or general purpose date/time pattern strings. Authors of schemas SHOULD use appropriate regular expressions, along with the string datatype, for dates and times that use a format other than that specified here. +

+
+
+

6.4.5 Formats for durations

+

+ Durations MUST be formatted and interpreted as defined in [xmlschema11-2], using the [ISO8601] format -?PnYnMnDTnHnMnS. For example, the duration P1Y1D is used for a year and a day; the duration PT2H30M for 2 hours and 30 minutes. +

+

+ If the datatype base is a duration type, the datatype format annotation provides a regular expression for the string values, with syntax and processing defined by [ECMASCRIPT]. If the supplied value is not a valid regular expression, implementations MUST issue a warning and proceed as if no format had been provided. +

+
Note

+ Authors are encouraged to be conservative in the regular expressions that they use, sticking to the basic features of regular expressions that are likely to be supported across implementations. +

+

+ The cell value will be one or more durations extracted using the format. +

+
+
+

6.4.6 Formats for other types

+

+ If the datatype base is not numeric, boolean, a date/time type, or a duration type, the datatype format annotation provides a regular expression for the string values, with syntax and processing defined by [ECMASCRIPT]. If the supplied value is not a valid regular expression, implementations MUST issue a warning and proceed as if no format had been provided. +

+
Note

+ Authors are encouraged to be conservative in the regular expressions that they use, sticking to the basic features of regular expressions that are likely to be supported across implementations. +

+

+ Values that are labelled as html, xml, or json SHOULD NOT be validated against those formats. +

+
Note

+ Metadata creators who wish to check the syntax of HTML, XML, or JSON within tabular data should use the datatype format annotation to specify a regular expression against which such values will be tested. +

+
+
+
+

6.5 Presenting Tables

This section is non-normative.

+

+ When presenting tables, implementations should: +

+
    +
  • use the table direction annotation on each table, and the text direction annotation on each cell, to determine the ordering of columns and characters within cells, as described in section 6.5.1 Bidirectional Tables
  • +
  • use the titles annotation on each column to provide a header for the column, selecting the first title in a language based on the user's locale and preferences, as described in section 6.5.2 Column and row labelling
  • +
  • add links to headers based on the property URLs of the cells in the first row of the table
  • +
  • present cell values, particularly boolean, numeric and date/time values, in a lexical form based on the user's locale and preferences
  • +
  • add links to the presentation of rows and cells based on the about URL and value URL annotations on cells
  • +
  • highlight or otherwise indicate cells with errors
  • +
  • provide a way of viewing non-core annotations on table groups, tables, columns, rows and cells
  • +
  • provide links to download the raw tabular data file that is being displayed
  • +
+
+

6.5.1 Bidirectional Tables

+

+ There are two levels of bidirectionality to consider when displaying tables: the directionality of the table (i.e., whether the columns should be arranged left-to-right or right-to-left) and the directionality of the content of individual cells. +

+

+ The table direction annotation on the table provides information about the desired display of the columns in the table. If table direction is ltr then the first column should be displayed on the left and the last column on the right. If table direction is rtl then the first column should be displayed on the right and the last column on the left. +

+

+ If table direction is auto then tables should be displayed with attention to the bidirectionality of the content of the cells in the table. Specifically, the values of the cells in the table should be scanned breadth first: from the first cell in the first column through to the last cell in the first row, down to the last cell in the last column. If the first character in the table with a strong type as defined in [BIDI] indicates a RTL directionality, the table should be displayed with the first column on the right and the last column on the left. Otherwise, the table should be displayed with the first column on the left and the last column on the right. Characters such as whitespace, quotes, commas, and numbers do not have a strong type, and therefore are skipped when identifying the character that determines the directionality of the table. +

+

+ Implementations should enable user preferences to override the indicated metadata about the directionality of the table. +

+

+ Once the directionality of the table has been determined, each cell within the table should be considered as a separate paragraph, as defined by the Unicode Bidirectional Algorithm (UBA) in [BIDI]. The directionality for the cell is determined by looking at the text direction annotation for the cell, as follows: +

+
    +
  1. If the text direction is ltr then the base direction for the cell content should be set to left-to-right.
  2. +
  3. If the text direction is rtl then the base direction for the cell content should be set to right-to-left.
  4. +
  5. If the text direction is auto then the base direction for the cell content should be set to the direction determined by the first character in the cell with a strong type as defined in [BIDI].
  6. +
+
Note

+ If the textDirection property in metadata has the value "inherit", the text direction annotation for a cell inherits its value from the table direction annotation on the table. +

+

+ When the titles of a column are displayed, these should be displayed in the direction determined by the first character in the title with a strong type as defined in [BIDI]. Titles for the same column in different languages may be displayed in different directions. +

+
+
+

6.5.2 Column and row labelling

+

+ The labelling of columns and rows helps those who are attempting to understand the content of a table to grasp what a particular cell means. Implementations should present appropriate titles for columns, and ensure that the most important information in a row is kept apparent to the user, to aid their understanding. For example: +

+
    +
  • a table presented on the screen might retain certain columns in view so that readers can easily glance at the identifying information in each row
  • +
  • as the user moves focus into a cell, screen readers announce a label for the new column if the user has changed column, or for the new row if the user has changed row
  • +
+

+ When labelling a column, either on the screen or aurally, implementations should use the first available of: +

+
    +
  1. the column's titles in the preferred language of the user, or with an undefined language if there is no title available in a preferred language; there may be multiple such titles in which case all should be announced
  2. +
  3. the column's name
  4. +
  5. the column's number
  6. +
+

+ When labelling a row, either on the screen or aurally, implementations should use the first available of: +

+
    +
  1. the row's titles in the preferred language of the user, or with an undefined language if there is no title available in a preferred language; there may be multiple such titles in which case all should be announced
  2. +
  3. the values of the cells in the row's primary key
  4. +
  5. the row's number
  6. +
+
+
+
+

6.6 Validating Tables

+

+ Validators test whether given tabular data files adhere to the structure defined within a schema. Validators MUST raise errors (and halt processing) and issue warnings (and continue processing) as defined in [tabular-metadata]. In addition, validators MUST raise errors but MAY continue validating in the following situations: +

+ +
+
+

6.7 Converting Tables

+

+ Conversions of tabular data to other formats operate over a annotated table constructed as defined in Annotating Tables in [tabular-metadata]. The mechanics of these conversions to other formats are defined in other specifications such as [csv2json] and [csv2rdf]. +

+

+ Conversion specifications MUST define a default mapping from an annotated table that lacks any annotations (i.e., that is equivalent to an un-annotated table). +

+

+ Conversion specifications MUST use the property value of the propertyUrl of a column as the basis for naming machine-readable fields in the target format, such as the name of the equivalent element or attribute in XML, property in JSON or property URI in RDF. +

+

+ Conversion specifications MAY use any of the annotations found on an annotated table group, table, column, row or cell, including non-core annotations, to adjust the mapping into another format. +

+

+ Conversion specifications MAY define additional annotations, not defined in this specification, which are specifically used when converting to the target format of the conversion. For example, a conversion to XML might specify a http://example.org/conversion/xml/element-or-attribute property on columns that determines whether a particular column is represented through an element or an attribute in the data. +

+
+
+
+

7. Best Practice CSV

This section is non-normative.

+

+ There is no standard for CSV, and there are many variants of CSV used on the web today. This section defines a method for expressing tabular data adhering to the annotated tabular data model in CSV. Authors are encouraged to adhere to the constraints described in this section as implementations should process such CSV files consistently. +

+
Note
+

+ This syntax is not compliant with text/csv as defined in [RFC4180] in that it permits line endings other than CRLF. Supporting LF line endings is important for data formats that are used on non-Windows platforms. However, all files that adhere to [RFC4180]'s definition of CSV meet the constraints described in this section. +

+

+ Developing a standard for CSV is outside the scope of the Working Group. The details here aim to help shape any future standard. +

+
+
+

7.1 Content Type

+

+ The appropriate content type for a CSV file is text/csv. For example, when a CSV file is transmitted via HTTP, the HTTP response should include a Content-Type header with the value text/csv: +

+
Content-Type: text/csv
+        
+
+
+

7.2 Encoding

+

+ CSV files should be encoded using UTF-8, and should be in Unicode Normal Form C as defined in [UAX15]. If a CSV file is not encoded using UTF-8, the encoding should be specified through the charset parameter in the Content-Type header: +

+
Content-Type: text/csv;charset=ISO-8859-1
+        
+
+
+

7.3 Line Endings

+

+ The ends of rows in a CSV file should be CRLF (U+000D U+000A) but may be LF (U+000A). Line endings within escaped cells are not normalised. +

+
+
+

7.4 Lines

+

+ Each line of a CSV file should contain the same number of comma-separated values. +

+

+ Values that contain commas, line endings, or double quotes should be escaped by having the entire value wrapped in double quotes. There should not be whitespace before or after the double quotes. Within these escaped cells, any double quotes should be escaped with two double quotes (""). +

+
+

7.4.1 Headers

+

+ The first line of a CSV file should contain a comma-separated list of names of columns. This is known as the header line and provides titles for the columns. There are no constraints on these titles. +

+

+ If a CSV file does not include a header line, this should be specified using the header parameter of the media type: +

+
Content-Type: text/csv;header=absent
+          
+
+
+ +
+

7.5 Grammar

+

This grammar is a generalization of that defined in [RFC4180] and is included for reference only.

+

The EBNF used here is defined in XML 1.0 [EBNF-NOTATION].

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
[1]csv::= + header + record+ +
[2]header::= + record +
[3]record::= + fields + #x0D? + #x0A +
[4]fields::= + field + ("," fields)* +
[5]field::= + WS* + rawfield + WS* +
[6]rawfield::= + '"' QCHAR* '"' + |SCHAR* +
[7]QCHAR::= + [^"] + |'""' +
[8]SCHAR::= + [^",#x0A#x0D] +
[9]WS::= + [#x20#x09] +
+
+
+
+
+

8. Parsing Tabular Data

This section is non-normative.

+

+ As described in section 7. Best Practice CSV, there may be many formats which an application might interpret into the tabular data model described in section 4. Tabular Data Models, including using different separators or fixed format tables, multiple tables within a single file, or ones that have metadata lines before a table header. +

+
Note

+ Standardizing the parsing of CSV is outside the chartered scope of the Working Group. This non-normative section is intended to help the creators of parsers handle the wide variety of CSV-based formats that they may encounter due to the current lack of standardization of the format. +

+

+ This section describes an algorithm for parsing formats that do not adhere to the constraints described in section 7. Best Practice CSV, as well as those that do, and extracting embedded metadata. The parsing algorithm uses the following flags. These may be set by metadata properties found while Locating Metadata, including through user input (see Overriding Metadata), or through the inclusion of a dialect description within a metadata file: +

+
+
comment prefix
+
A string that, when it appears at the beginning of a row, indicates that the row is a comment that should be associated as a rdfs:comment annotation to the table. This is set by the commentPrefix property of a dialect description. The default is null, which means no rows are treated as comments. A value other than null may mean that the source numbers of rows are different from their numbers.
+
delimiter
+
The separator between cells, set by the delimiter property of a dialect description. The default is ,.
+
encoding
+
The character encoding for the file, one of the encodings listed in [encoding], set by the encoding property of a dialect description. The default is utf-8.
+
escape character
+
The string that is used to escape the quote character within escaped cells, or null, set by the doubleQuote property of a dialect description. The default is " (such that "" is used to escape " within an escaped cell).
+
header row count
+
The number of header rows (following the skipped rows) in the file, set by the header or headerRowCount property of a dialect description. The default is 1. A value other than 0 will mean that the source numbers of rows will be different from their numbers.
+
line terminators
+
The strings that can be used at the end of a row, set by the lineTerminators property of a dialect description. The default is [CRLF, LF].
+
quote character
+
The string that is used around escaped cells, or null, set by the quoteChar property of a dialect description. The default is ".
+
skip blank rows
+
Indicates whether to ignore wholly empty rows (i.e. rows in which all the cells are empty), set by the skipBlankRows property of a dialect description. The default is false. A value other than false may mean that the source numbers of rows are different from their numbers.
+
skip columns
+
The number of columns to skip at the beginning of each row, set by the skipColumns property of a dialect description. The default is 0. A value other than 0 will mean that the source numbers of columns will be different from their numbers.
+
skip rows
+
The number of rows to skip at the beginning of the file, before a header row or tabular data, set by the skipRows property of a dialect description. The default is 0. A value greater than 0 will mean that the source numbers of rows will be different from their numbers.
+
trim
+
Indicates whether to trim whitespace around cells; may be true, false, start, or end, set by the skipInitialSpace or trim property of a dialect description. The default is true.
+
+

+ The algorithm for using these flags to parse a document containing tabular data to create a basic annotated tabular data model and to extract embedded metadata is as follows: +

+
    +
  1. + Create a new table T with the annotations: + +
  2. +
  3. + Create a metadata document structure M that looks like: +
    {
    +  "@context": "http://www.w3.org/ns/csvw",
    +  "rdfs:comment": []
    +  "tableSchema": {
    +    "columns": []
    +  }
    +}
    +          
    +
  4. +
  5. + If the URL of the tabular data file being parsed is known, set the url property on M to that URL. +
  6. +
  7. + Set source row number to 1. +
  8. +
  9. +

    + Read the file using the encoding, as specified in [encoding], using the replacement error mode. If the encoding is not a Unicode encoding, use a normalizing transcoder to normalize into Unicode Normal Form C as defined in [UAX15]. +

    +
    Note

    + The replacement error mode ensures that any non-Unicode characters within the CSV file are replaced by U+FFFD, ensuring that strings within the tabular data model such as column titles and cell string values only contain valid Unicode characters. +

    +
  10. +
  11. + Repeat the following the number of times indicated by skip rows: +
      +
    1. Read a row to provide the row content.
    2. +
    3. If the comment prefix is not null and the row content begins with the comment prefix, strip that prefix from the row content, and add the resulting string to the M.rdfs:comment array.
    4. +
    5. Otherwise, if the row content is not an empty string, add the row content to the M.rdfs:comment array.
    6. +
    7. Add 1 to the source row number.
    8. +
    +
  12. +
  13. + Repeat the following the number of times indicated by header row count: +
      +
    1. Read a row to provide the row content.
    2. +
    3. If the comment prefix is not null and the row content begins with the comment prefix, strip that prefix from the row content, and add the resulting string to the M.rdfs:comment array.
    4. +
    5. Otherwise, parse the row to provide a list of cell values, and: +
        +
      1. Remove the first skip columns number of values from the list of cell values.
      2. +
      3. For each of the remaining values at index i in the list of cell values: +
          +
        1. If the value at index i in the list of cell values is an empty string or consists only of whitespace, do nothing.
        2. +
        3. Otherwise, if there is no column description object at index i in M.tableSchema.columns, create a new one with a title property whose value is an array containing a single value that is the value at index i in the list of cell values.
        4. +
        5. Otherwise, add the value at index i in the list of cell values to the array at M.tableSchema.columns[i].title.
        6. +
        +
      4. +
      +
    6. +
    7. Add 1 to the source row number.
    8. +
    +
  14. +
  15. + If header row count is zero, create an empty column description object in M.tableSchema.columns for each column in the current row after skip columns. +
  16. +
  17. Set row number to 1.
  18. +
  19. + While it is possible to read another row, do the following: +
      +
    1. Set the source column number to 1.
    2. +
    3. Read a row to provide the row content.
    4. +
    5. If the comment prefix is not null and the row content begins with the comment prefix, strip that prefix from the row content, and add the resulting string to the M.rdfs:comment array.
    6. +
    7. Otherwise, parse the row to provide a list of cell values, and: +
        +
      1. If all of the values in the list of cell values are empty strings, and skip blank rows is true, add 1 to the source row number and move on to process the next row.
      2. +
      3. Otherwise, create a new row R, with: + +
      4. +
      5. Append R to the rows of table T.
      6. +
      7. Remove the first skip columns number of values from the list of cell values and add that number to the source column number.
      8. +
      9. For each of the remaining values at index i in the list of cell values (where i starts at 1): +
          +
        1. Identify the column C at index i within the columns of table T. If there is no such column: +
            +
          1. Create a new column C with: + +
          2. +
          3. Append C to the columns of table T (at index i).
          4. +
          +
        2. +
        3. Create a new cell D, with: + +
        4. +
        5. Append cell D to the cells of column C.
        6. +
        7. Append cell D to the cells of row R (at index i).
        8. +
        9. Add 1 to the source column number.
        10. +
        +
      10. +
      +
    8. +
    9. Add 1 to the source row number.
    10. +
    +
  20. +
  21. If M.rdfs:comment is an empty array, remove the rdfs:comment property from M.
  22. +
  23. Return the table T and the embedded metadata M.
  24. +
+

+ To read a row to provide row content, perform the following steps: +

+
    +
  1. Set the row content to an empty string.
  2. +
  3. Read initial characters and process as follows: +
      +
    1. If the string starts with the escape character followed by the quote character, append both strings to the row content, and move on to process the string following the quote character.
    2. +
    3. Otherwise, if the string starts with the escape character and the escape character is not the same as the quote character, append the escape character and the single character following it to the row content and move on to process the string following that character.
    4. +
    5. Otherwise, if the string starts with the quote character, append the quoted value obtained by reading a quoted value to the row content and move on to process the string following the quoted value.
    6. +
    7. Otherwise, if the string starts with one of the line terminators, return the row content.
    8. +
    9. Otherwise, append the first character to the row content and move on to process the string following that character.
    10. +
    +
  4. +
  5. If there are no more characters to read, return the row content.
  6. +
+

+ To read a quoted value to provide a quoted value, perform the following steps: +

+
    +
  1. Set the quoted value to an empty string.
  2. +
  3. Read the initial quote character and add a quote character to the quoted value.
  4. +
  5. Read initial characters and process as follows: +
      +
    1. If the string starts with the escape character followed by the quote character, append both strings to the quoted value, and move on to process the string following the quote character.
    2. +
    3. Otherwise, if string starts with the escape character and the escape character is not the same as the quote character, append the escape character and the character following it to the quoted value and move on to process the string following that character.
    4. +
    5. Otherwise, if the string starts with the quote character, return the quoted value.
    6. +
    7. Otherwise, append the first character to the quoted value and move on to process the string following that character.
    8. +
    +
  6. +
+

+ To parse a row to provide a list of cell values, perform the following steps: +

+
    +
  1. Set the list of cell values to an empty list and the current cell value to an empty string.
  2. +
  3. Set the quoted flag to false.
  4. +
  5. Read initial characters and process as follows: +
      +
    1. If the string starts with the escape character followed by the quote character, append the quote character to the current cell value, and move on to process the string following the quote character.
    2. +
    3. Otherwise, if the string starts with the escape character and the escape character is not the same as the quote character, append the character following the escape character to the current cell value and move on to process the string following that character.
    4. +
    5. Otherwise, if the string starts with the quote character then: +
        +
      1. If quoted is false, set the quoted flag to true, and move on to process the remaining string. If the current cell value is not an empty string, raise an error.
      2. +
      3. Otherwise, set quoted to false, and move on to process the remaining string. If the remaining string does not start with the delimiter, raise an error.
      4. +
      +
    6. +
    7. Otherwise, if the string starts with the delimiter, then: +
        +
      1. If quoted is true, append the delimiter string to the current cell value and move on to process the remaining string.
      2. +
      3. Otherwise, conditionally trim the current cell value, add the resulting trimmed cell value to the list of cell values and move on to process the following string.
      4. +
      +
    8. +
    9. Otherwise, append the first character to the current cell value and move on to process the remaining string.
    10. +
    +
  6. +
  7. If there are no more characters to read, conditionally trim the current cell value, add the resulting trimmed cell value to the list of cell values and return the list of cell values.
  8. +
+

+ To conditionally trim a cell value to provide a trimmed cell value, perform the following steps:

+
    +
  1. Set the trimmed cell value to the provided cell value.
  2. +
  3. If trim is true or start then remove any leading whitespace from the start of the trimmed cell value and move on to the next step.
  4. +
  5. If trim is true or end then remove any trailing whitespace from the end of the trimmed cell value and move on to the next step.
  6. +
  7. Return the trimmed cell value.
  8. +
+
Note

+ This parsing algorithm does not account for the possibility of there being more than one area of tabular data within a single CSV file. +

+
+

8.1 Bidirectionality in CSV Files

This section is non-normative.

+

+ Bidirectional content does not alter the definition of rows or the assignment of cells to columns. Whether or not a CSV file contains right-to-left characters, the first column's content is the first cell of each row, which is the text prior to the first occurrence of a comma within that row. +

+
+

+ For example, Egyptian Referendum results are available as a CSV file at https://egelections-2011.appspot.com/Referendum2012/results/csv/EG.csv. Over the wire and in non-Unicode-aware text editors, the CSV looks like: +

+
            
+‌ا‌ل‌م‌ح‌ا‌ف‌ظ‌ة‌,‌ن‌س‌ب‌ة‌ ‌م‌و‌ا‌ف‌ق‌,‌ن‌س‌ب‌ة‌ ‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌,‌ع‌د‌د‌ ‌ا‌ل‌ن‌ا‌خ‌ب‌ي‌ن‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ص‌ح‌ي‌ح‌ة‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ب‌ا‌ط‌ل‌ة‌,‌ن‌س‌ب‌ة‌ ‌ا‌ل‌م‌ش‌ا‌ر‌ك‌ة‌,‌م‌و‌ا‌ف‌ق‌,‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌
+‌ا‌ل‌ق‌ل‌ي‌و‌ب‌ي‌ة‌,60.0,40.0,"2,639,808","853,125","15,224",32.9,"512,055","341,070"
+‌ا‌ل‌ج‌ي‌ز‌ة‌,66.7,33.3,"4,383,701","1,493,092","24,105",34.6,"995,417","497,675"
+‌ا‌ل‌ق‌ا‌ه‌ر‌ة‌,43.2,56.8,"6,580,478","2,254,698","36,342",34.8,"974,371","1,280,327"
+‌ق‌ن‌ا‌,84.5,15.5,"1,629,713","364,509","6,743",22.8,"307,839","56,670"
+...
+            
+          
+

+ Within this CSV file, the first column appears as the content of each line before the first comma and is named المحافظة (appearing at the start of each row as ‌ا‌ل‌م‌ح‌ا‌ف‌ظ‌ة‌ in the example, which is displaying the relevant characters from left to right in the order they appear "on the wire"). +

+

+ The CSV translates to a table model that looks like: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Column / Rowcolumn 1column 2column 3column 4column 5column 6column 7column 8column 9
column namesالمحافظةنسبة موافقنسبة غير موافقعدد الناخبينالأصوات الصحيحةالأصوات الباطلةنسبة المشاركةموافقغير موافق
row 1القليوبية60.040.02,639,808853,12515,22432.9512,055341,070
row 2الجيزة66.733.34,383,7011,493,09224,10534.6995,417497,675
row 3القاهرة43.256.86,580,4782,254,69836,34234.8974,3711,280,327
row 4قنا84.515.51,629,713364,5096,74322.8307,83956,670
+

+ The fragment identifier #col=3 identifies the third of the columns, named نسبة غير موافق (appearing as ‌ن‌س‌ب‌ة‌ ‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌ in the example). +

+

+ section 6.5.1 Bidirectional Tables defines how this table model should be displayed by compliant applications, and how metadata can affect the display. The default is for the display to be determined by the content of the table. For example, if this CSV were turned into an HTML table for display into a web page, it should be displayed with the first column on the right and the last on the left, as follows: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
غير موافقموافقنسبة المشاركةالأصوات الباطلةالأصوات الصحيحةعدد الناخبيننسبة غير موافقنسبة موافقالمحافظة
341,070512,05532.915,224853,1252,639,80840.060.0القليوبية
497,675995,41734.624,1051,493,0924,383,70133.366.7الجيزة
1,280,327974,37134.836,3422,254,6986,580,47856.843.2القاهرة
56,670307,83922.86,743364,5091,629,71315.584.5قنا
+

+ The fragment identifier #col=3 still identifies the third of the columns, named نسبة غير موافق, which appears in the HTML display as the third column from the right and is what those who read right-to-left would think of as the third column. +

+

+ Note that this display matches that shown on the original website. +

+
+
+
+

8.2 Examples

+
+

8.2.1 Simple Example

+

+ A simple CSV file that complies to the constraints described in section 7. Best Practice CSV, at http://example.org/tree-ops.csv, might look like: +

+
Example 14: http://example.org/tree-ops.csv
GID,On Street,Species,Trim Cycle,Inventory Date
+1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010
+2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
+

+ Parsing this file results in an annotated tabular data model of a single table T with five columns and two rows. The columns have the annotations shown in the following table: +

+ + + + + + + + + + + + +
idcore annotations
tablenumbersource numbercellstitles
C1T11C1.1, C2.1GID
C2T22C1.2, C2.2On Street
C3T33C1.3, C2.3Species
C4T44C1.4, C2.4Trim Cycle
C5T55C1.5, C2.5Inventory Date
+

The extracted embedded metadata, as defined in [tabular-metadata], would look like:

+
Example 15: tree-ops.csv Embedded Metadata
{
+  "@type": "Table",
+  "url": "http://example.org/tree-ops.csv",
+  "tableSchema": {
+    "columns": [
+      {"titles": [ "GID" ]},
+      {"titles": [ "On Street" ]},
+      {"titles": [ "Species" ]},
+      {"titles": [ "Trim Cycle" ]},
+      {"titles": [ "Inventory Date" ]}
+    ]
+  }
+}
+

+ The rows have the annotations shown in the following table: +

+ + + + + + + + + +
idcore annotations
tablenumbersource numbercells
R1T12C1.1, C1.2, C1.3, C1.4, C1.5
R2T23C2.1, C2.2, C2.3, C2.4, C2.5
+
Note

+ The source number of each row is offset by one from the number of each row because in the source CSV file, the header line is the first line. It is possible to reconstruct a [RFC7111] compliant reference to the first record in the original CSV file (http://example.org/tree-ops.csv#row=2) using the value of the row's source number. This enables implementations to retain provenance between the table model and the original file. +

+

+ The cells have the annotations shown in the following table (note that the values of all the cells in the table are strings, denoted by the double quotes in the table below): +

+ + + + + + + + + + + + + + + + + +
idcore annotations
tablecolumnrowstring valuevalue
C1.1TC1R1"1""1"
C1.2TC2R1"ADDISON AV""ADDISON AV"
C1.3TC3R1"Celtis australis""Celtis australis"
C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
C1.5TC5R1"10/18/2010""10/18/2010"
C2.1TC1R2"2""2"
C2.2TC2R2"EMERSON ST""EMERSON ST"
C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
C2.5TC5R2"6/2/2010""6/2/2010"
+
+
8.2.1.1 Using Overriding Metadata
+

+ The tools that the consumer of this data uses may provide a mechanism for overriding the metadata that has been provided within the file itself. For example, they might enable the consumer to add machine-readable names to the columns, or to mark the fifth column as holding a date in the format M/D/YYYY. These facilities are implementation defined; the code for invoking a Javascript-based parser might look like: +

+
Example 16: Javascript implementation configuration
data.parse({
+  "column-names": ["GID", "on_street", "species", "trim_cycle", "inventory_date"],
+  "datatypes": ["string", "string", "string", "string", "date"],
+  "formats": [null,null,null,null,"M/D/YYYY"]
+});
+

+ This is equivalent to a metadata file expressed in the syntax defined in [tabular-metadata], looking like: +

+
Example 17: Equivalent metadata syntax
{
+  "@type": "Table",
+  "url": "http://example.org/tree-ops.csv",
+  "tableSchema": {
+    "columns": [{
+      "name": "GID",
+      "datatype": "string"
+    }, {
+      "name": "on_street",
+      "datatype": "string"
+    }, {
+      "name": "species",
+      "datatype": "string"
+    }, {
+      "name": "trim_cycle",
+      "datatype": "string"
+    }, {
+      "name": "inventory_date",
+      "datatype": {
+        "base": "date",
+        "format": "M/d/yyyy"
+      }
+    }]
+  }
+}
+

+ This would be merged with the embedded metadata found in the CSV file, providing the titles for the columns to create: +

+
Example 18: Merged metadata
{
+  "@type": "Table",
+  "url": "http://example.org/tree-ops.csv",
+  "tableSchema": {
+    "columns": [{
+      "name": "GID",
+      "titles": "GID",
+      "datatype": "string"
+    }, {
+      "name": "on_street",
+      "titles": "On Street",
+      "datatype": "string"
+    }, {
+      "name": "species",
+      "titles": "Species",
+      "datatype": "string"
+    }, {
+      "name": "trim_cycle",
+      "titles": "Trim Cycle",
+      "datatype": "string"
+    }, {
+      "name": "inventory_date",
+      "titles": "Inventory Date",
+      "datatype": {
+        "base": "date",
+        "format": "M/d/yyyy"
+      }
+    }]
+  }
+}
+

+ The processor can then create an annotated tabular data model that included name annotations on the columns, and datatype annotations on the cells, and created cells whose values were of appropriate types (in the case of this Javascript implementation, the cells in the last column would be Date objects, for example). +

+

+ Assuming this kind of implementation-defined parsing, the columns would then have the annotations shown in the following table: +

+ + + + + + + + + + + + +
idcore annotations
tablenumbersource numbercellsnametitlesdatatype
C1T11C1.1, C2.1GIDGIDstring
C2T22C1.2, C2.2on_streetOn Streetstring
C3T33C1.3, C2.3speciesSpeciesstring
C4T44C1.4, C2.4trim_cycleTrim Cyclestring
C5T55C1.5, C2.5inventory_dateInventory Date{ "base": "date", "format": "M/d/yyyy" }
+

+ The cells have the annotations shown in the following table. Because of the overrides provided by the consumer to guide the parsing, and the way the parser works, the cells in the Inventory Date column (cells C1.5 and C2.5) have values that are parsed dates rather than unparsed strings. +

+ + + + + + + + + + + + + + + + + +
idcore annotations
tablecolumnrowstring valuevalue
C1.1TC1R1"1""1"
C1.2TC2R1"ADDISON AV""ADDISON AV"
C1.3TC3R1"Celtis australis""Celtis australis"
C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
C1.5TC5R1"10/18/2010"2010-10-18
C2.1TC1R2"2""2"
C2.2TC2R2"EMERSON ST""EMERSON ST"
C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
C2.5TC5R2"6/2/2010"2010-06-02
+
+
+
8.2.1.2 Using a Metadata File
+

+ A similar set of annotations could be provided through a metadata file, located as discussed in section 5. Locating Metadata and defined in [tabular-metadata]. For example, this might look like: +

+
Example 19: http://example.org/tree-ops.csv-metadata.json
{
+  "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}],
+  "url": "tree-ops.csv",
+  "dc:title": "Tree Operations",
+  "dcat:keyword": ["tree", "street", "maintenance"],
+  "dc:publisher": {
+    "schema:name": "Example Municipality",
+    "schema:url": {"@id": "http://example.org"}
+  },
+  "dc:license": {"@id": "http://opendefinition.org/licenses/cc-by/"},
+  "dc:modified": {"@value": "2010-12-31", "@type": "xsd:date"},
+  "tableSchema": {
+    "columns": [{
+      "name": "GID",
+      "titles": ["GID", "Generic Identifier"],
+      "dc:description": "An identifier for the operation on a tree.",
+      "datatype": "string",
+      "required": true
+    }, {
+      "name": "on_street",
+      "titles": "On Street",
+      "dc:description": "The street that the tree is on.",
+      "datatype": "string"
+    }, {
+      "name": "species",
+      "titles": "Species",
+      "dc:description": "The species of the tree.",
+      "datatype": "string"
+    }, {
+      "name": "trim_cycle",
+      "titles": "Trim Cycle",
+      "dc:description": "The operation performed on the tree.",
+      "datatype": "string"
+    }, {
+      "name": "inventory_date",
+      "titles": "Inventory Date",
+      "dc:description": "The date of the operation that was performed.",
+      "datatype": {"base": "date", "format": "M/d/yyyy"}
+    }],
+    "primaryKey": "GID",
+    "aboutUrl": "#gid-{GID}"
+  }
+}
+

+ The annotated tabular data model generated from this would be more sophisticated again. The table itself would have the following annotations: +

+
+
dc:title
+
{"@value": "Tree Operations", "@language": "en"}
+
dcat:keyword
+
[{"@value": "tree", "@language", "en"}, {"@value": "street", "@language": "en"}, {"@value": "maintenance", "@language": "en"}]
+
dc:publisher
+
[{ "schema:name": "Example Municipality", "schema:url": {"@id": "http://example.org"} }]
+
dc:license
+
{"@id": "http://opendefinition.org/licenses/cc-by/"}
+
dc:modified
+
{"@value": "2010-12-31", "@type": "date"}
+
+

+ The columns would have the annotations shown in the following table: +

+ + + + + + + + + + + + +
idcore annotationsother annotations
tablenumbersource numbercellsnametitlesdatatypedc:description
C1T11C1.1, C2.1GIDGID, Generic IdentifierstringAn identifier for the operation on a tree.
C2T22C1.2, C2.2on_streetOn StreetstringThe street that the tree is on.
C3T33C1.3, C2.3speciesSpeciesstringThe species of the tree.
C4T44C1.4, C2.4trim_cycleTrim CyclestringThe operation performed on the tree.
C5T55C1.5, C2.5inventory_dateInventory Date{ "base": "date", "format": "M/d/yyyy" }The date of the operation that was performed.
+

+ The rows have an additional primary key annotation, as shown in the following table: +

+ + + + + + + + + +
idcore annotations
tablenumbersource numbercellsprimary key
R1T12C1.1, C1.2, C1.3, C1.4, C1.5C1.1
R2T23C2.1, C2.2, C2.3, C2.4, C2.5C2.1
+

+ Thanks to the provided metadata, the cells again have the annotations shown in the following table. The metadata file has provided the information to supplement the model with additional annotations but also, for the Inventory Date column (cells C1.5 and C2.5), have a value that is a parsed date rather than an unparsed string. +

+ + + + + + + + + + + + + + + + + +
idcore annotations
tablecolumnrowstring valuevalueabout URL
C1.1TC1R1"1""1"http://example.org/tree-ops.csv#gid-1
C1.2TC2R1"ADDISON AV""ADDISON AV"http://example.org/tree-ops.csv#gid-1
C1.3TC3R1"Celtis australis""Celtis australis"http://example.org/tree-ops.csv#gid-1
C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"http://example.org/tree-ops.csv#gid-1
C1.5TC5R1"10/18/2010"2010-10-18http://example.org/tree-ops.csv#gid-1
C2.1TC1R2"2""2"http://example.org/tree-ops.csv#gid-2
C2.2TC2R2"EMERSON ST""EMERSON ST"http://example.org/tree-ops.csv#gid-2
C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"http://example.org/tree-ops.csv#gid-2
C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"http://example.org/tree-ops.csv#gid-2
C2.5TC5R2"6/2/2010"2010-06-02http://example.org/tree-ops.csv#gid-2
+
+
+
+

8.2.2 Empty and Quoted Cells

+

+ The following slightly amended CSV file contains quoted and missing cell values: +

+
Example 20: CSV file containing quoted and missing cell values
GID,On Street,Species,Trim Cycle,Inventory Date
+1,ADDISON AV,"Celtis australis","Large Tree Routine Prune",10/18/2010
+2,,"Liquidambar styraciflua","Large Tree Routine Prune",
+

+ Parsing this file similarly results in an annotated tabular data model of a single table T with five columns and two rows. The columns and rows have exactly the same annotations as previously, but there are two null cell values for C2.2 and C2.5. Note that the quoting of values within the CSV makes no difference to either the string value or value of the cell. +

+ + + + + + + + + + + + + + + + + +
idcore annotations
tablecolumnrowstring valuevalue
C1.1TC1R1"1""1"
C1.2TC2R1"ADDISON AV""ADDISON AV"
C1.3TC3R1"Celtis australis""Celtis australis"
C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
C1.5TC5R1"10/18/2010""10/18/2010"
C2.1TC1R2"2""2"
C2.2TC2R2""null
C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
C2.5TC5R2""null
+
+
+

8.2.3 Tabular Data Embedding Annotations

+

+ The following example illustrates some of the complexities that can be involved in parsing tabular data, how the flags described above can be used, and how new tabular data formats could be defined that embed additional annotations into the tabular data model. +

+

+ In this example, the publishers of the data are using an internal convention to supply additional metadata about the tabular data embedded within the file itself. They are also using a tab as a separator rather than a comma. +

+
Example 21: Tab-separated file containing embedded metadata
#	publisher	City of Palo Alto
+#	updated	12/31/2010
+#name	GID	on_street	species	trim_cycle	inventory_date
+#datatype	string	string	string	string	date:M/D/YYYY
+	GID	On Street	Species	Trim Cycle	Inventory Date
+	1	ADDISON AV	Celtis australis	Large Tree Routine Prune	10/18/2010
+	2	EMERSON ST	Liquidambar styraciflua	Large Tree Routine Prune	6/2/2010
+
+
8.2.3.1 Naive Parsing
+

+ Naive parsing of the above data will assume a comma separator and thus results in a single table T with a single column and six rows. The column has the annotations shown in the following table: +

+ + + + + + + + +
idcore annotations
tablenumbersource numbercellstitles
C1T11C1.1, C2.1, C3.1, C4.1, C5.1# publisher City of Palo Alto
+

+ The rows have the annotations shown in the following table: +

+ + + + + + + + + + + + + +
idcore annotations
tablenumbersource numbercells
R1T12C1.1
R2T23C2.1
R3T34C3.1
R4T45C4.1
R5T56C5.1
R6T67C6.1
+

+ The cells have the annotations shown in the following table (note that the values of all the cells in the table are strings, denoted by the double quotes in the table below): +

+ + + + + + + + + + + + + +
idcore annotations
tablecolumnrowstring valuevalue
C1.1TC1R1"# updated 12/31/2010""# updated 12/31/2010"
C1.1TC1R1"#name GID on_street species trim_cycle inventory_date""#name GID on_street species trim_cycle inventory_date"
C2.1TC1R2"#datatype string string string string date:M/D/YYYY""#datatype string string string string date:M/D/YYYY"
C3.1TC1R3" GID On Street Species Trim Cycle Inventory Date"" GID On Street Species Trim Cycle Inventory Date"
C4.1TC1R4" 1 ADDISON AV Celtis australis Large Tree Routine Prune 10/18/2010"" 1 ADDISON AV Celtis australis Large Tree Routine Prune 10/18/2010"
C5.1TC1R5" 2 EMERSON ST Liquidambar styraciflua Large Tree Routine Prune 6/2/2010"" 2 EMERSON ST Liquidambar styraciflua Large Tree Routine Prune 6/2/2010"
+
+
+
8.2.3.2 Parsing with Flags
+

+ The consumer of the data may use the flags described above to create a more useful set of data from this file. Specifically, they could set: +

+ +

+ Setting these is done in an implementation-defined way. It could be done, for example, by sniffing the contents of the file itself, through command-line options, or by embedding a dialect description into a metadata file associated with the tabular data, which would look like: +

+
Example 22: Dialect description
{
+  "delimiter": "\t",
+  "skipRows": 4,
+  "skipColumns": 1,
+  "commentPrefix": "#"
+}
+

+ With these flags in operation, parsing this file results in an annotated tabular data model of a single table T with five columns and two rows which is largely the same as that created from the original simple example described in section 8.2.1 Simple Example. There are three differences. +

+

+ First, because the four skipped rows began with the comment prefix, the table itself now has four rdfs:comment annotations, with the values: +

+
    +
  1. publisher City of Palo Alto
  2. +
  3. updated 12/31/2010
  4. +
  5. name GID on_street species trim_cycle inventory_date
  6. +
  7. datatype string string string string date:M/D/YYYY
  8. +
+

+ Second, because the first column has been skipped, the source number of each of the columns is offset by one from the number of each column: +

+ + + + + + + + + + + + +
idcore annotations
tablenumbersource numbercellstitles
C1T12C1.1, C2.1GID
C2T23C1.2, C2.2On Street
C3T34C1.3, C2.3Species
C4T45C1.4, C2.4Trim Cycle
C5T56C1.5, C2.5Inventory Date
+

+ Finally, because four additional rows have been skipped, the source number of each of the rows is offset by five from the row number (the four skipped rows plus the single header row): +

+ + + + + + + + + +
idcore annotations
tablenumbersource numbercells
R1T16C1.1, C1.2, C1.3, C1.4, C1.5
R2T27C2.1, C2.2, C2.3, C2.4, C2.5
+
+
+
8.2.3.3 Recognizing Tabular Data Formats
+

+ The conventions used in this data (invented for the purpose of this example) are in fact intended to create an annotated tabular data model which includes named annotations on the table itself, on the columns, and on the cells. The creator of these conventions could create a specification for this particular tabular data syntax and register a media type for it. The specification would include statements like: +

+
    +
  • A tab delimiter is always used.
  • +
  • The first column is always ignored.
  • +
  • When the first column of a row has the value "#", the second column is the name of an annotation on the table and the values of the remaining columns are concatenated to create the value of that annotation.
  • +
  • When the first column of a row has the value #name, the remaining cells in the row provide a name annotation for each column in the table.
  • +
  • When the first column of a row has the value #datatype, the remaining cells in the row provide datatype/format annotations for the cells within the relevant column, and these are interpreted to create the value for each cell in that column.
  • +
  • The first row where the first column is empty is a row of headers; these provide title annotations on the columns in the table.
  • +
  • The remaining rows make up the data of the table.
  • +
+

+ Parsers that recognized the format could then build a more sophisticated annotated tabular data model using only the embedded information in the tabular data file. They would extract embedded metadata looking like: +

+
Example 23: Embedded metadata in the format of the annotated tabular model
{
+  "@context": "http://www.w3.org/ns/csvw",
+  "url": "tree-ops.csv",
+  "dc:publisher": "City of Palo Alto",
+  "dc:updated": "12/31/2010",
+  "tableSchema": {
+    "columns": [{
+      "name": "GID",
+      "titles": "GID",
+      "datatype": "string",
+    }, {
+      "name": "on_street",
+      "titles": "On Street",
+      "datatype": "string"
+    }, {
+      "name": "species",
+      "titles": "Species",
+      "datatype": "string"
+    }, {
+      "name": "trim_cycle",
+      "titles": "Trim Cycle",
+      "datatype": "string"
+    }, {
+      "name": "inventory_date",
+      "titles": "Inventory Date",
+      "datatype": {
+        "base": "date",
+        "format": "M/d/yyyy"
+      }
+    }]
+  }
+}
+

+ As before, the result would be a single table T with five columns and two rows. The table itself would have two annotations: +

+
+
dc:publisher
+
{"@value": "City of Palo Alto"}
+
dc:updated
+
{"@value": "12/31/2010"}
+
+

+ The columns have the annotations shown in the following table: +

+ + + + + + + + + + + + +
idcore annotations
tablenumbersource numbercellsnametitles
C1T12C1.1, C2.1GIDGID
C2T23C1.2, C2.2on_streetOn Street
C3T34C1.3, C2.3speciesSpecies
C4T45C1.4, C2.4trim_cycleTrim Cycle
C5T56C1.5, C2.5inventory_dateInventory Date
+

+ The rows have the annotations shown in the following table, exactly as in previous examples: +

+ + + + + + + + + +
idcore annotations
tablenumbersource numbercells
R1T16C1.1, C1.2, C1.3, C1.4, C1.5
R2T27C2.1, C2.2, C2.3, C2.4, C2.5
+

+ The cells have the annotations shown in the following table. Because of the way the particular tabular data format has been specified, these include additional annotations but also, for the Inventory Date column (cells C1.5 and C2.5), have a value that is a parsed date rather than an unparsed string. +

+ + + + + + + + + + + + + + + + + +
idcore annotations
tablecolumnrowstring valuevalue
C1.1TC1R1"1""1"
C1.2TC2R1"ADDISON AV""ADDISON AV"
C1.3TC3R1"Celtis australis""Celtis australis"
C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
C1.5TC5R1"10/18/2010"2010-10-18
C2.1TC1R2"2""2"
C2.2TC2R2"EMERSON ST""EMERSON ST"
C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
C2.5TC5R2"6/2/2010"2010-06-02
+
+
+
+

8.2.4 Parsing Multiple Header Lines

+

+ The following example shows a CSV file with multiple header lines: +

+
Example 24: CSV file with multiple header lines
Who,What,,Where,
+Organization,Sector,Subsector,Department,Municipality
+#org,#sector,#subsector,#adm1,#adm2
+UNICEF,Education,Teacher training,Chocó,Quidbó
+UNICEF,Education,Teacher training,Chocó,Bojayá
+

+ Here, the first line contains some grouping titles in the first line, which are not particularly helpful. The lines following those contain useful titles for the columns. Thus the appropriate configuration for a dialect description is: +

+
Example 25: Dialect description for multiple header lines
{
+  "skipRows": 1,
+  "headerRowCount": 2
+}
+

+ With this configuration, the table model contains five columns, each of which have two titles, summarized in the following table: +

+ + + + + + + + + + + + +
idcore annotations
tablenumbersource numbercellstitles
C1T11C1.1, C2.1Organization, #org
C2T22C1.2, C2.2Sector, #sector
C3T33C1.3, C2.3Subsector, #subsector
C4T44C1.4, C2.4Department, #adm1
C5T55C1.5, C2.5Municipality, #adm2
+

+ As metadata, this would look like: +

+
Example 26: Extracted metadata
{
+  "tableSchema": {
+    "columns": [
+      { "titles": ["Organization", "#org"] },
+      { "titles": ["Sector", "#sector"] },
+      { "titles": ["Subsector", "#subsector"] },
+      { "titles": ["Department", "#adm1"] },
+      { "titles": ["Municipality", "#adm2"] },
+    ]
+  }
+}
+

+ A separate metadata file could contain just the second of each of these titles, for example: +

+
Example 27: Metadata file
{
+  "tableSchema": {
+    "columns": [
+      { "name": "org", "titles": #org" },
+      { "name": "sector", "titles": #sector" },
+      { "name": "subsector", "titles": #subsector" },
+      { "name": "adm1", "titles": #adm1" },
+      { "name": "adm2", "titles": #adm2" },
+    ]
+  }
+}
+

+ This enables people from multiple jurisdictions to use the same tabular data structures without having to use exactly the same titles within their documents. +

+
+
+
+
+

A. IANA Considerations

+ /.well-known/csvm +
+
URI suffix:
+
csvm
+
Change controller:
+
W3C
+
Specification document(s):
+
This document, section 5.3 Default Locations and Site-wide Location Configuration
+
+
+
+

B. Existing Standards

This section is non-normative.

+

+ This appendix outlines various ways in which CSV is defined. +

+
+

B.1 RFC 4180

+

+ [RFC4180] defines CSV with the following ABNF grammar: +

+
file = [header CRLF] record *(CRLF record) [CRLF]
+header = name *(COMMA name)
+record = field *(COMMA field)
+name = field
+field = (escaped / non-escaped)
+escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
+non-escaped = *TEXTDATA
+COMMA = %x2C
+CR = %x0D
+DQUOTE =  %x22
+LF = %x0A
+CRLF = CR LF
+TEXTDATA =  %x20-21 / %x23-2B / %x2D-7E
+        
+

+ Of particular note here are: +

+
    +
  • The production for TEXTDATA indicates that only non-control ASCII characters are permitted within a CSV file. This restriction is routinely ignored in practice, and is impractical on the international web.
  • +
  • Lines should be ended with CRLF. This makes it harder to produce CSV files on Unix-based systems where the usual line ending is LF.
  • +
  • The header line is optional; a header parameter on the media type indicates whether the header is present or not.
  • +
  • Fields may be escaped by wrapping them in double quotes; any double quotes within the field must be escaped with two double quotes ("").
  • +
+
+
+

B.2 Excel

+

+ Excel is a common tool for both creating and reading CSV documents, and therefore the CSV that it produces is a de facto standard. +

+
Note

+ The following describes the behavior of Microsoft Excel for Mac 2011 with an English locale. Further testing is needed to see the behavior of Excel in other situations. +

+
+

B.2.1 Saved CSV

+

+ Excel generates CSV files encoded using Windows-1252 with LF line endings. Characters that cannot be represented within Windows-1252 are replaced by underscores. Only those cells that need escaping (e.g. because they contain commas or double quotes) are escaped, and double quotes are escaped with two double quotes. +

+

+ Dates and numbers are formatted as displayed, which means that formatting can lead to information being lost or becoming inconsistent. +

+
+
+

B.2.2 Opened CSV

+

+ When opening CSV files, Excel interprets CSV files saved in UTF-8 as being encoded as Windows-1252 (whether or not a BOM is present). It correctly deals with double quoted cells, except that it converts line breaks within cells into spaces. It understands CRLF as a line break. It detects dates (formatted as YYYY-MM-DD) and formats them in the default date formatting for files. +

+
+
+

B.2.3 Imported CSV

+

+ Excel provides more control when importing CSV files into Excel. However, it does not properly understand UTF-8 (with or without BOM). It does however properly understand UTF-16 and can read non-ASCII characters from a UTF-16-encoded file. +

+

+ A particular quirk in the importing of CSV is that if a cell contains a line break, the final double quote that escapes the cell will be included within it. +

+
+
+

B.2.4 Copied Tabular Data

+

+ When tabular data is copied from Excel, it is copied in a tab-delimited format, with LF line breaks. +

+
+
+
+

B.3 Google Spreadsheets

+
+

B.3.1 Downloading CSV

+

+ Downloaded CSV files are encoded in UTF-8, without a BOM, and with LF line endings. Dates and numbers are formatted as they appear within the spreadsheet. +

+
+
+

B.3.2 Importing CSV

+

+ CSV files can be imported as UTF-8 (with or without BOM). CRLF line endings are correctly recognized. Dates are reformatted to the default date format on load. +

+
+
+
+

B.4 CSV Files in a Tabular Data Package

+

+ Tabular Data Packages place the following restrictions on CSV files: +

+
+

As a starting point, CSV files included in a Tabular Data Package package must conform to the RFC for CSV (4180 - Common Format and MIME Type for Comma-Separated Values (CSV) Files). In addition:

+ +
    +
  • File names MUST end with .csv.

  • +
  • Files MUST be encoded as UTF-8.

  • +
  • +

    Files MUST have a single header row. This row MUST be the first row in the file.

    +
      +
    • Terminology: each column in the CSV file is termed a field and its name is the string in that column in the header row.

    • +
    • The name MUST be unique amongst fields, MUST contain at least one character, and MUST conform to the character restrictions defined for the name property.

    • +
    +
  • +
  • Rows in the file MUST NOT contain more fields than are in the header row (though they may contain less).

  • +
  • Each file MUST have an entry in the tables array in the datapackage.json file.

  • +
  • The resource metadata MUST include a tableSchema attribute whose value MUST be a valid schema description.

  • +
  • All fields in the CSV files MUST be described in the schema description.

  • +
+ +

CSV files generated by different applications often vary in their syntax, e.g. use of quoting characters, delimiters, etc. To encourage conformance, CSV files in a CSV files in a Tabular Data Package SHOULD:

+ +
    +
  • Use "," as field delimiters.
  • +
  • Use CRLF (U+000D U+000A) or LF (U+000A) as line terminators.
  • +
+ +

If a CSV file does not follow these rules then its specific CSV dialect MUST be documented. The resource + hash for the resource in the datapackage.json descriptor MUST:

+ + + +

Applications processing the CSV file SHOULD read use the dialect of the CSV file to guide parsing.

+
+
+
Note
+

+ To replicate the findings above, test files which include non-ASCII characters, double quotes, and line breaks within cells are: +

+ +
+
+
+

C. Acknowledgements

This section is non-normative.

+
At the time of publication, the following individuals had participated in the Working Group, in the order of their first name: + Adam Retter, + Alf Eaton, + Anastasia Dimou, + Andy Seaborne, + Axel Polleres, + Christopher Gutteridge, + Dan Brickley, + Davide Ceolin, + Eric Stephan, + Erik Mannens, + Gregg Kellogg, + Ivan Herman, + Jeni Tennison, + Jeremy Tandy, + Jürgen Umbrich, + Rufus Pollock, + Stasinos Konstantopoulos, + William Ingram, and + Yakov Shafranovich. +
+
+
+

D. Changes from previous drafts

+
+

D.1 Changes since the candidate recommendation of 16 July 2015

+
    +
  • Use text/tab-separated-values instead of the un-registered text/tsv.
  • +
  • /.well-known/csvm has been registered at IANA
  • +
+
+
+

D.2 Changes since the working draft of 16 April 2015

+
    +
  • Merging of metadata files has been removed as it was determined not to be necessary.
  • +
  • Embedded metadata now used for compatibility check only, or as metadata if no other is found.
  • +
  • The titles annotation has been added to rows, and a section added describing the way in which screen readers should announce rows and columns to users
  • +
  • A Datatype description may have an id annotation to reference an external datatype definition in XSD, OWL, or some other format.
  • +
  • Renamed the direction annotation to table direction.
  • +
  • The built-in locations for locating metadata files were removed in favor of a site-wide configuration file, which uses the original values for file-specific and directory-specific metadata locations as the default value. See section 5.3 Default Locations and Site-wide Location Configuration.
  • +
  • The pattern for numeric types is now a number format pattern rather than a regular expression.
  • +
+
+
+

D.3 Changes since the working draft of 08 January 2015

+

The document has undergone substantial changes since the last working draft. Below are some of the changes made:

+ +
+
+ + +

E. References

E.1 Normative references

[BCP47]
A. Phillips; M. Davis. Tags for Identifying Languages. September 2009. IETF Best Current Practice. URL: https://tools.ietf.org/html/bcp47 +
[BIDI]
Mark Davis; Aharon Lanin; Andrew Glass. Unicode Bidirectional Algorithm. 5 June 2014. Unicode Standard Annex #9. URL: http://www.unicode.org/reports/tr9/ +
[ECMASCRIPT]
ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ +
[ISO8601]
Representation of dates and times. International Organization for Standardization. 2004. ISO 8601:2004. URL: http://www.iso.org/iso/catalogue_detail?csnumber=40874 +
[JSON-LD]
Manu Sporny; Gregg Kellogg; Markus Lanthaler. JSON-LD 1.0. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/json-ld/ +
[RFC2119]
S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 +
[RFC3968]
G. Camarillo. The Internet Assigned Number Authority (IANA) Header Field Parameter Registry for the Session Initiation Protocol (SIP). December 2004. Best Current Practice. URL: https://tools.ietf.org/html/rfc3968 +
[RFC4180]
Y. Shafranovich. Common Format and MIME Type for Comma-Separated Values (CSV) Files. October 2005. Informational. URL: https://tools.ietf.org/html/rfc4180 +
[RFC5785]
M. Nottingham; E. Hammer-Lahav. Defining Well-Known Uniform Resource Identifiers (URIs). April 2010. Proposed Standard. URL: https://tools.ietf.org/html/rfc5785 +
[UAX35]
Mark Davis; CLDR committee members. Unicode Locale Data Markup Language (LDML). 15 March 2013. Unicode Standard Annex #35. URL: http://www.unicode.org/reports/tr35/tr35-31/tr35.html +
[UNICODE]
The Unicode Standard. URL: http://www.unicode.org/versions/latest/ +
[URI-TEMPLATE]
J. Gregorio; R. Fielding; M. Hadley; M. Nottingham; D. Orchard. URI Template. March 2012. Proposed Standard. URL: https://tools.ietf.org/html/rfc6570 +
[tabular-metadata]
Jeni Tennison; Gregg Kellogg. Metadata Vocabulary for Tabular Data. W3C Recommendation. URL: http://www.w3.org/TR/2015/REC-tabular-metadata-20151217/ +
[xmlschema11-2]
David Peterson; Sandy Gao; Ashok Malhotra; Michael Sperberg-McQueen; Henry Thompson; Paul V. Biron et al. W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes. 5 April 2012. W3C Recommendation. URL: http://www.w3.org/TR/xmlschema11-2/ +

E.2 Informative references

[EBNF-NOTATION]
Tim Bray; Jean Paoli; C. Michael Sperberg-McQueen; Eve Maler; François Yergau. EBNF Notation. W3C Recommendation. URL: http://www.w3.org/TR/xml/#sec-notation +
[RFC7111]
M. Hausenblas; E. Wilde; J. Tennison. URI Fragment Identifiers for the text/csv Media Type. January 2014. Informational. URL: https://tools.ietf.org/html/rfc7111 +
[UAX15]
Mark Davis; Ken Whistler. Unicode Normalization Forms. 31 August 2012. Unicode Standard Annex #15. URL: http://www.unicode.org/reports/tr15 +
[annotation-model]
Robert Sanderson; Paolo Ciccarese; Benjamin Young. Web Annotation Data Model. 15 October 2015. W3C Working Draft. URL: http://www.w3.org/TR/annotation-model/ +
[csv2json]
Jeremy Tandy; Ivan Herman. Generating JSON from Tabular Data on the Web. W3C Recommendation. URL: http://www.w3.org/TR/2015/REC-csv2json-20151217/ +
[csv2rdf]
Jeremy Tandy; Ivan Herman; Gregg Kellogg. Generating RDF from Tabular Data on the Web. W3C Recommendation. URL: http://www.w3.org/TR/2015/REC-csv2rdf-20151217/ +
[encoding]
Anne van Kesteren; Joshua Bell; Addison Phillips. Encoding. 20 October 2015. W3C Candidate Recommendation. URL: http://www.w3.org/TR/encoding/ +
[vocab-data-cube]
Richard Cyganiak; Dave Reynolds. The RDF Data Cube Vocabulary. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/vocab-data-cube/ +
\ No newline at end of file diff --git a/test/docs/metadata/tracking-compliance.html b/test/docs/metadata/tracking-compliance.html new file mode 100644 index 000000000..03a1369c3 --- /dev/null +++ b/test/docs/metadata/tracking-compliance.html @@ -0,0 +1,1116 @@ + + + + Tracking Compliance and Scope + + + + + + + +

Abstract

+

This specification defines a set of practices for compliance with a + user's Do Not Track (DNT) tracking preference to which a server may claim + adherence.

+

Status of This Document

+ + + +

+ This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. +

+ + + + + +

+ This document was published by the Tracking Protection Working Group as a Last Call Working Draft on 14 July 2015. + + This document is intended to become a W3C Recommendation. + + + If you wish to make comments regarding this document, please send them to + public-tracking-comments@w3.org + (subscribe, + archives). All comments are publicly archived; if you have not used W3C mailing + lists in the past, you will need to approve archiving (instructions are + sent via email auto-reply) before your comments will be distributed. + + The Last Call period ends 07 October 2015. + + + + + All comments are welcome. + + +

+ +

The Tracking Protection Working Group invites broad community review, + especially of technical requirements and dependencies. Reviewers are + encouraged to comment on the extent to which technical requirements of the + group's charter have been met and how significant dependencies with groups + inside and outside W3C have been satisfied. The Working Group will evaluate + all comments received and determine whether or how the specification needs + to be modified in light of the comments. Comments will be most useful in + identifying particular problems with the specification that might inhibit + adoption, or where this specification fails to further goals of user + privacy and user control, and whether this specification creates or does + not otherwise resolve dependencies with other technical standards, + practices, or processes. The Chairs of the Working Group will issue written + responses to all comments received.

+ +

The Working Group previously published a Last Call Working Draft of the + companion Tracking Preference Expression [TPE] specification, which + defines mechanisms for expressing a user's preference regarding tracking + and for communicating whether and how received preferences are honored. + That specification is expected to transition to Candidate Recommendation + shortly.

+ +

Revisions from the previous Working Draft include editorial changes, + removal of separate "tracking data" terminology, and updates to compliance + with expressed tracking preferences and indications of such compliance. + Readers may review + changes from the previous Working Draft.

+ + +

+ Publication as a Last Call Working Draft does not imply endorsement by the W3C + Membership. This is a draft document and may be updated, replaced or obsoleted by other + documents at any time. It is inappropriate to cite this document as other than work in + progress. +

+ + + +

+ This is a Last Call Working Draft and thus the Working Group has determined that this + document has satisfied the relevant technical requirements and is sufficiently stable to + advance through the Technical Recommendation process. +

+ +

+ + This document was produced by a group operating under the + 5 February 2004 W3C Patent + Policy. + + + + + W3C maintains a public list of any patent + disclosures + + made in connection with the deliverables of the group; that page also includes + instructions for disclosing a patent. An individual who has actual knowledge of a patent + which the individual believes contains + Essential + Claim(s) must disclose the information in accordance with + section + 6 of the W3C Patent Policy. + + +

+ +

+ This document is governed by the 14 October 2005 W3C Process Document. +

+ + + + + +

Table of Contents

+ + + +
+

1. Scope

+ +

Do Not Track is designed to provide users with a simple mechanism to + express a preference to allow or limit online tracking. Complying + with the user's preference as described in this document includes limits on + the collection, retention and use of data collected as a third party + to user actions and the sharing of data not + permanently de-identified.

+ +

This specification is intended for compliance with expressed user + preferences via user agents that (1) can access + the general browsable Web; (2) have a user interface that satisfies the + requirements in Determining + User Preference in the [TPE] specification; and, (3) can implement + all of the [TPE] specification, including the mechanisms for + communicating a tracking status, and the user-granted exception + mechanism.

+ +

It is outside the scope of this specification to control short-term, + transient collection and use of data, so long as the data is not shared + with a third party and is not used to build a profile about a user or + otherwise alter an individual user’s experience outside the current network + interaction. For example, the contextual customization of ads shown as part + of the same network interaction is not restricted by a DNT:1 + signal.

+
+ +
+

2. Definitions

+ +
+

2.1 User

+ +

A user is a natural person who is making, or has made, use + of the Web.

+
+ +
+

2.2 User Agent

+ +

The term user agent refers to any of the various client + programs capable of initiating HTTP requests, including but not limited + to browsers, spiders (web-based robots), command-line tools, native + applications, and mobile apps [RFC7230].

+
+ +
+

2.3 Network Interaction

+ +

A network interaction is a single HTTP request and its + corresponding response(s): zero or more interim (1xx) responses and a + single final (2xx-5xx) response.

+
+ +
+

2.4 User Action

+ +

A user action is a deliberate action by the user, via + configuration, invocation, or selection, to initiate a network + interaction. Selection of a link, submission of a form, and reloading a + page are examples of user actions.

+
+ +
+

2.5 Party

+ +

A party is a natural person, a legal entity, or a set of + legal entities that share common owner(s), common controller(s), and a + group identity that is easily discoverable by a user. Common branding or + providing a list of affiliates that is available via a link from a + resource where a party describes DNT practices are examples of ways to + provide this discoverability.

+
+ +
+

2.6 Service Provider

+ +

Access to Web resources often involves multiple parties that might + process the data received in a network interaction. For example, domain + name services, network access points, content distribution networks, load + balancing services, security filters, cloud platforms, and + software-as-a-service providers might be a party to a given network + interaction because they are contracted by either the user or the + resource owner to provide the mechanisms for communication. Likewise, + additional parties might be engaged after a network interaction, such as + when services or contractors are used to perform specialized data + analysis or records retention.

+ +

For the data received in a given network interaction, a service + provider is considered to be the same party as its + contractee if the service provider:

+ +
    +
  1. processes the data on behalf of the contractee;
  2. + +
  3. ensures that the data is only retained, accessed, and used as + directed by the contractee;
  4. + +
  5. has no independent right to use the data other than in a + permanently de-identified form (e.g., for monitoring service + integrity, load balancing, capacity planning, or billing); and, +
  6. + +
  7. has a contract in place with the contractee which is consistent + with the above limitations.
  8. +
+
+ +
+

2.7 First Party

+ +

With respect to a given user action, a first party is a + party with which the user intends to interact, via one or more network + interactions, as a result of making that action. Merely hovering over, + muting, pausing, or closing a given piece of content does not constitute + a user's intent to interact with another party.

+ +

In some cases, a resource on the Web will be jointly controlled by two + or more distinct parties. Each of those parties is considered a first + party to a given user action if a user would reasonably expect to + communicate with all of them when accessing that resource. For example, + prominent co-branding on the resource might lead a user to expect that + multiple parties are responsible for the content or functionality.

+ +

Network interactions related to a given user action may not constitute + intentional interaction when, for example, the user is unaware or only + transiently informed of redirection or framed content.

+
+ +
+

2.8 Third Party

+ +

For any data collected as a result of one or more network interactions + resulting from a user's action, a third party is any party + other than that user, a first party for that user action, or a service + provider acting on behalf of either that user or that first party.

+
+ +
+

2.9 De-identification

+ +

Data is permanently de-identified when there exists a high + level of confidence that no human subject of the data can be identified, + directly or indirectly (e.g., via association with an identifier, user + agent, or device), by that data alone or in combination with other + retained or available information.

+ +
+

2.9.1 De-identification Considerations

This section is non-normative.

+ +

In this specification the term permanently de-identified is + used for data that has passed out of the scope of this specification + and can not, and will never, come back into scope. The organization + that performs the de-identification needs to be confident that the data + can never again identify the human subjects whose activity contributed + to the data. That confidence may result from ensuring or demonstrating + that it is no longer possible to:

+ +
    +
  • isolate some or all records which correspond to a device or + user;
  • + +
  • link two or more records (either from the same database or + different databases), concerning the same device or user;
  • + +
  • deduce, with significant probability, information about a device + or user.
  • +
+ +

Regardless of the de-identification approach, unique keys can be + used to correlate records within the de-identified dataset, provided + the keys do not exist and cannot be derived outside the de-identified + dataset and have no meaning outside the de-identified dataset (i.e. no + mapping table can exist that links the original identifiers to the keys + in the de-identified dataset).

+ +

In the case of records in such data that relate to a single user or + a small number of users, usage and/or distribution restrictions are + advisable; experience has shown that such records can, in fact, + sometimes be used to identify the user or users despite technical + measures taken to prevent re-identification. It is also a good practice + to disclose (e.g. in the privacy policy) the process by which + de-identification of these records is done, as this can both raise the + level of confidence in the process, and allow for for feedback on the + process. The restrictions might include, for example:

+ +
    +
  • technical safeguards that prohibit re-identification of + de-identified data;
  • + +
  • business processes that specifically prohibit re-identification + of de-identified data;
  • + +
  • business processes that prevent inadvertent release of + de-identified data;
  • + +
  • administrative controls that limit access to de-identified + data.
  • +
+ +

Geolocation data (of a certain precision or over a period of time) + may itself identify otherwise de-identified data.

+
+
+ +
+

2.10 Tracking

+ +

Tracking is the collection of data regarding a particular + user's activity across multiple distinct contexts and the retention, use, + or sharing of data derived from that activity outside the context in + which it occurred. A context is a set of resources that are + controlled by the same party or jointly controlled by a set of + parties.

+
+ +
+

2.11 Collect, Use, Share

+ +

A party collects data received in a network interaction if + that data remains within the party’s control after the network + interaction is complete.

+ +

A party uses data if the party processes the data for any + purpose other than storage or merely forwarding it to another party.

+ +

A party shares data if it transfers or provides a copy of + data to any other party.

+
+
+ +
+

3. Server Compliance

+ +
+

3.1 Indicating Compliance and Non-Compliance

+ +

In order to indicate a party's compliance with a user's expressed + tracking preference as described in this specification for a given + resource, an origin server:

+ +
    +
  1. MUST conform to the origin server requirements of [TPE];
  2. + +
  3. MUST send a tracking status value other than ! (under + construction) or D (disregarding) for that resource; + and
  4. + +
  5. MUST send, in a tracking status representation applicable to that + resource, a compliance property that contains a reference to the + following URI: + +
    + http://www.w3.org/TR/2015/WD-tracking-compliance-20150714/ +
    +
  6. +
+ +

When a user sends a DNT:0 signal, the user is expressing + a preference to allow tracking. This specification places no restrictions + on collection or use of data from network interactions with + DNT:0 signals. Note, however, that a party might be limited + by its own statements to the user regarding the DNT:0 + setting. For more information, see Section 4. Consent.

+ +

A party to a given user action which receives a DNT:1 + signal and is tracking that action MUST indicate so to the user + agent. A party that is tracking a user with that user's consent to + override an expressed DNT:1 preference MUST indicate so with + the corresponding C or P + tracking status values. A party that is tracking a user for reasons + allowable under this specification (for example, for one of the permitted + uses described below) MUST use the T value. A party to a + given user action that is not engaged in tracking SHOULD use the + N value (a T value is also conformant but not + as informative).

+ +

A party to a given user action that disregards a DNT:1 + signal MUST indicate that non-compliance to the user agent, using the + response mechanism defined in the [TPE] specification. The party MUST + provide information in its privacy policy listing the specific reasons + for not honoring the user's expressed preference. The party's + representation MUST be clear and easily discoverable.

+ +

In the interest of transparency, especially where multiple reasons are + listed, a server might use the [TPE] qualifiers + or config + properties to indicate a particular reason for disregarding or steps to + address the issue. A user agent can parse this response to communicate + the reason to the user or direct the user to the relevant section of a + privacy policy. This document does not define specific qualifiers for + different reasons servers might have for disregarding signals.

+
+ +
+

3.2 First Party Compliance

+ +

With respect to a given user action, a first party to that action + which receives a DNT:1 signal MAY collect, retain and use + data received from those network interactions. This includes customizing + content, services and advertising with respect to those user actions.

+ + + +

A first party to a given user action MUST NOT share data about those + network interactions with third parties to that action who are prohibited + from collecting data from those network interactions under this + specification. Data about the interaction MAY be shared with service + providers acting on behalf of that first party.

+ +

Compliance rules in this section apply where a party determines that + it is a first party to a given user action — either because network + resources are intended only for use as a first party to a user action or + because the status is dynamically discerned. For cases where a party + later determines that data was unknowingly collected as a third party to + a user action, see Section 6. Unknowing Collection.

+ +

A first party to a given user action MAY elect to follow the rules + defined under this specification for third parties.

+
+ +
+

3.3 Third Party Compliance

+ +

When a third party to a given user action receives a + DNT:1 signal in a related network interaction, that party + MAY collect and use data about those network interactions when:

+ +
    +
  1. a user has explicitly granted consent, as described below (Section + 4. Consent); +
  2. + +
  3. data is collected for the set of permitted uses described below + (Section 3.3.2 Permitted Uses); +
  4. + +
  5. or, the data is permanently de-identified as defined in this + specification. +
  6. +
+ +

Other than under those enumerated conditions, that party:

+ +
    +
  1. MUST NOT collect data from this network interaction that would + result in data regarding this particular user being associated across + multiple contexts;
  2. + +
  3. MUST NOT retain, use, or share data from this particular user's + activity outside the context in which that activity occurred; and
  4. + +
  5. MUST NOT use data from network interactions with this particular + user in a different context. +
  6. +
+ + + +

Outside the permitted uses and explicitly-granted exceptions listed + below, a third party to a given user action MUST NOT collect, share, or + associate with related network interactions any identifiers that identify + a specific user, user agent, or device. For example, a third party that + does not require unique user identifiers for one of the permitted uses + MUST NOT place a unique identifier in cookies or other browser-based + local storage mechanisms.

+ +
+

3.3.1 General Requirements for Permitted Uses

+ +

Some collection and use of data by third parties to a given user + action is permitted, notwithstanding receipt of DNT:1 in a + network interaction, as enumerated below. Different permitted uses may + differ in their permitted items of data collection, retention times, + and consequences. In all cases, collection and use of data must be + reasonably necessary and proportionate to achieve the purpose for which + it is specifically permitted; unreasonable or disproportionate + collection, retention, or use are not “permitted uses”.

+ +
Note

The requirements in the following sub-sections apply to + a party that collects data for a permitted use and that would otherwise + be prohibited from collecting, retaining or using that data under the + third-party compliance requirements above. Where a first party to a + given user action, for example, collects some data for a purpose listed + among the permitted uses (e.g. security of network services), these + requirements do not apply.

+ +
+
3.3.1.1 No Secondary Uses
+ +

A party MUST NOT use data collected for permitted uses for + purposes other than the permitted uses for which each datum was + permitted to be collected.

+
+ +
+
3.3.1.2 Data Minimization, Retention and Transparency
+ +

Data collected by a party for permitted uses MUST be minimized to + the data reasonably necessary for such permitted uses. Such data MUST NOT be retained any longer than is proportionate to, and reasonably + necessary for, such permitted uses. A party MUST NOT rely on unique + identifiers if alternative solutions are reasonably available.

+ +

A party MUST publicly describe definite time periods for which + data collected for permitted uses are retained. The party MAY + enumerate different retention periods for different permitted uses. + Data MUST NOT be used for a permitted use once the data retention + period for that permitted use has expired. After there are no + remaining permitted uses for given data, the data MUST be deleted or + permanently de-identified.

+
+ +
+
3.3.1.3 No Personalization
+ +

A party that collects data for a permitted use MUST NOT use that + data to alter a specific user's online experience, except as + specifically permitted below.

+
+ +
+
3.3.1.4 Reasonable Security
+ +

A party that collects data for a permitted use MUST use reasonable + technical and organizational safeguards to prevent further processing + of data retained for permitted uses. While physical separation of + data maintained for permitted uses is not required, best practices + SHOULD be in place to ensure technical controls ensure access + limitations and information security.

+
+
+ +
+

3.3.2 Permitted Uses

+ +
+
3.3.2.1 Frequency Capping
+ +

Regardless of the tracking preference expressed, data MAY be + collected, retained and used to limit the number of times that a user + sees a particular advertisement, often called frequency + capping, as long as the data retained do not reveal the user’s + browsing history.

+
+ +
+
3.3.2.2 Financial Logging
+ +

Regardless of the tracking preference expressed, data MAY be + collected and used for billing and auditing related to the + current network interaction and concurrent transactions. This may + include counting ad impressions to unique visitors, verifying + positioning and quality of ad impressions and auditing compliance + with this specification and other standards.

+
+ +
+
3.3.2.3 Security
+ +

Regardless of the tracking preference expressed, data MAY be + collected and used to the extent reasonably necessary to detect + security incidents, protect the service against malicious, + deceptive, fraudulent, or illegal activity, and prosecute those + responsible for such activity, provided that such data is not used + for operational behavior beyond what is reasonably necessary to + protect the service or institute a graduated response.

+ +

When feasible, a graduated response to a detected security + incident is preferred over widespread data collection. In this + specification, a graduated response is a data minimization + methodology where actions taken are proportional to the problem or + risk being mitigated.

+ + +
+ +
+
3.3.2.4 Debugging
+ +

Regardless of the tracking preference expressed, data MAY be + collected, retained and used for debugging purposes to + identify and repair errors that impair existing intended + functionality.

+
+
+ +
+

3.3.3 Qualifiers for Permitted Uses

+ +

A party MAY indicate which of the listed permitted uses apply to + tracking of a user with the qualifiers + mechanism defined in the [TPE] document. While providing qualifiers + is OPTIONAL, a party that wishes to indicate particular permitted uses + MUST use the corresponding characters as indicated in the table + below.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
qualifierpermitted use
c + frequency capping +
f + financial logging +
s + security +
d + debugging +
+ +

A party MAY use multiple qualifiers to indicate that multiple + permitted uses of tracking might be ongoing and that each such use + conforms to any corresponding requirements. Where qualifiers are + present, a party MUST indicate all claimed permitted uses.

+ + +
+
+
+ +
+

4. Consent

+ +

A party MAY engage in practices otherwise proscribed by this + specification when the user has given explicit and informed consent. After + consent is received, it might be subsequently registered through the + User-Granted Exceptions API defined in the companion [TPE] document or + recorded out of band using a different technology. A party MUST + indicate when it is relying on out of band consent to override a Do + Not Track preference, as described in the companion [TPE] document.

+ + + +
+

4.1 Transfer of consent to another party

+ +

When a party requests consent from the user as described above, it + might include consent for sharing data with its service providers. This transitive + permission might apply even to those parties to which the user has + not separately granted consent to be tracked.

+ +

A party that transfers consent in this way MUST ensure that its + service providers acknowledge this + consent by use of the corresponding + tracking status value of C and a qualifier + of t ("transferred").

+ + +
+
+ +
+

5. Interaction with Existing User Privacy Controls

+ +

Multiple systems may be setting, sending, and receiving DNT and/or + opt-out signals at the same time. As a result, it will be important to + ensure industry and web browser vendors are on the same page with respect + to honoring user choices in circumstances where "mixed signals" may be + received.

+ +

As a general principle, more specific settings override less specific + settings, as where the specific consent in user-granted exceptions + overrides a general preference. If a party perceives a conflict between + settings, a party MAY seek clarification from the user or MAY honor the + more restrictive setting.

+
+ +
+

6. Unknowing Collection

+ +

If a party learns that it possesses data in violation of this + specification, it MUST, where reasonably feasible, delete or de-identify + that data at the earliest practical opportunity, even if it was previously + unaware of such information practices despite reasonable efforts to + understand its information practices.

+
+ + + +
+

A. Acknowledgements

+ +

This specification consists of input from many discussions within and + around the W3C Tracking Protection Working Group, along with written + contributions from: Haakon Flage Bratsberg (Opera Software), Amy Colando + (Microsoft), Rob van Eijk (Invited Expert), Roy T. Fielding (Adobe), Vinay + Goel (Adobe), Yianni Lagos (Future of Privacy Forum), Tom Lowenthal + (Mozilla), Ted Leung (The Walt Disney Company), Jonathan Mayer (Stanford), + Ninja Marnau (Invited Expert), Mike O'Neill (Baycloud Systems), Thomas + Roessler (W3C), Wendy Seltzer (W3C), Rob Sherman (Facebook), John M. + Simpson (Invited Expert), David Singer (Apple), Kevin G. Smith (Adobe), + Vincent Toubiana (Invited Expert), Rigo Wenning (W3C), and Shane Wiley + (Yahoo!). The co-chairs of the group have helped guide those discussions: + Justin Brookman (CDT), Carl Cargill (Adobe), + Aleecia M. McDonald (Stanford), Matthias Schunter (Intel), and Peter Swire + (Invited Expert).

+ +

Many thanks to Robin Berjon for ReSpec.

+
+ + +

B. References

B.1 Normative references

[RFC7230]
R. Fielding, Ed.; J. Reschke, Ed.. Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing. June 2014. Proposed Standard. URL: https://tools.ietf.org/html/rfc7230 +
[TPE]
Roy T. Fielding; David Singer. Tracking Preference Expression (DNT). 24 April 2014. W3C Last Call Working Draft. URL: http://www.w3.org/TR/tracking-dnt/ +
\ No newline at end of file diff --git a/test/docs/metadata/ttml-imsc1.html b/test/docs/metadata/ttml-imsc1.html new file mode 100644 index 000000000..b50571311 --- /dev/null +++ b/test/docs/metadata/ttml-imsc1.html @@ -0,0 +1,4075 @@ + + + + + + TTML Profiles for Internet Media Subtitles and Captions 1.0 (IMSC1) + + + + + + +

Abstract

+

This document specifies two profiles of [TTML1]: a text-only profile and an image-only profile. These profiles are + intended to be used across subtitle and caption delivery applications worldwide, thereby simplifying interoperability, + consistent rendering and conversion to other subtitling and captioning formats.

+ +

It is feasible to create documents that simultaneously conform to both [ttml10-sdp-us] and the text-only profile.

+ +

The document defines extensions to [TTML1], as well as incorporates extensions specified in [ST2052-1] and + [EBU-TT-D].

+ +

Both profiles are based on [SUBM].

+

Status of This Document

+ + + +

+ This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. +

+ + + +

+ This document was published by the Timed Text Working Group as a Proposed Recommendation. + + This document is intended to become a W3C Recommendation. + + + + + + + The W3C Membership and other interested parties are invited to review the document and + send comments to + public-tt@w3.org + (subscribe, + archives) + through 08 April 2016. Advisory Committee Representatives should consult their + WBS questionnaires. + Note that substantive technical comments were expected during the last Candidate Recommendation review + period that ended 28 February 2016. + +

+ + +

+ Please see the Working Group's implementation + report. +

+ + + +

+ The implementation report documents that there is at least 2 independent implementations for every feature defined + in this specification but not already present in [TTML1], thereby satisfying the + Candidate Recommendation exit criteria. +

+ +

A list of the substantive changes applied since the initial Working Draft is available.

+ + + +

+ Publication as a Proposed Recommendation does not imply endorsement by the W3C + Membership. This is a draft document and may be updated, replaced or obsoleted by other + documents at any time. It is inappropriate to cite this document as other than work in + progress. +

+ + + +

+ + This document was produced by + + a group + operating under the + 5 February 2004 W3C Patent + Policy. + + + + + W3C maintains a public list of any patent + disclosures + + made in connection with the deliverables of + + the group; that page also includes + + instructions for disclosing a patent. An individual who has actual knowledge of a patent + which the individual believes contains + Essential + Claim(s) must disclose the information in accordance with + section + 6 of the W3C Patent Policy. + + +

+ +

This document is governed by the 1 September 2015 W3C Process Document. +

+ + + + + + +
+ + + +
+

1. Scope

+ +

This document specifies two profiles of [TTML1]: a text-only profile and an image-only profile. These profiles are + intended for subtitle and caption delivery worldwide, including dialog language translation, content description, captions for + deaf and hard of hearing, etc.

+ +

The text profile is a syntactic superset of [ttml10-sdp-us], and a document can simultaneously conform to both [ttml10-sdp-us] and the text-only profile.

+ +

The document defines extensions to [TTML1], as well as incorporates extensions specified in [ST2052-1] and + [EBU-TT-D].

+
+ +
+

2. Documentation Conventions

+ +

This specification uses the same conventions as [TTML1] for the specification of parameter attributes, styling attributes and metadata elements. In particular, Section 2.3 of [TTML1] specifies conventions used in the XML representation of elements.

+ +

All content of this specification that is not explicitly marked as non-normative is considered to be normative. If a section or appendix header contains the expression "non-normative", then the entirety of the section or appendix is considered non-normative.

+ +

This specification uses Feature and Extension + designations as defined in Appendices D.1 and E.1 at [TTML1]:

+
    +
  • when making reference to content conformance, + these designations refer to the syntactic expression or the semantic + capability associated with each designated Feature or + Extension; and
  • +
  • when making reference to processor + conformance, these designations refer to processing + requirements associated with each designated Feature or + Extension.
  • +
+ +

If the name of an element referenced in this specification is not namespace qualified, then the TT namespace applies (see 6.3 Namespaces.)

+ +
+ +
+

3. Terms and Definitions

+ +

Default Region. See Section 9.3.1 at [TTML1].

+ +

Document Instance. See Section 2.2 at [TTML1].

+ +

Extension. See Section 2.2 at [TTML1].

+ +

Feature. See Section 2.2 at [TTML1].

+ +

Intermediate Synchronic Document. See Section 9.3.2 at [TTML1].

+ +

Document Interchange Context. See Section 2.2 at [TTML1].

+ +

Document Processing Context. See Section 2.2 at [TTML1].

+ +

Processor. Either a Presentation processor or a Transformation processor.

+ +

Presentation processor. See Section 2.2 at [TTML1].

+ +

Transformation processor. See Section 2.2 at [TTML1].

+ +

Related Media Object. See Section 2.2 at [TTML1].

+ +

Related Video Object. A Related Media Object that consists of a sequence of image frames, each a rectangular array of pixels.

+ +

Text Alternative. As defined in [WCAG20].

+ +
+ +

4. Conformance

+

+ As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, + and notes in this specification are non-normative. Everything else in this specification is + normative. +

+

The key words MAY, SHALL, SHALL NOT, SHOULD, and SHOULD NOT are + to be interpreted as described in [RFC2119]. +

+ + +

A Document Instance that conforms to a profile defined herein:

+ +
    +
  • SHALL satisfy all normative provisions specified by the profile;
  • + +
  • MAY include any vocabulary, syntax or attribute value associated with a Feature or + Extension whose disposition is permitted in the profile;
  • + +
  • SHALL NOT include any vocabulary, syntax or attribute value associated with a Feature or Extension + whose disposition is prohibited in the profile.
  • + +
+ +
Note

A Document Instance, by definition, satisfies the requirements of Section 3.1 at [TTML1], + and hence a Document Instance that conforms to a profile defined herein is also a conforming TTML1 Document Instance.

+ +

A presentation processor that conforms to a profile defined in this specification:

+ +
    +
  • SHALL satisfy the Generic Processor Conformance requirements at Section 3.2.1 of [TTML1];
  • + +
  • SHALL satisfy all normative provisions specified by the profile; and
  • + +
  • SHALL implement presentation semantic support for every Feature and Extension designated as permitted by the profile, subject to + any additional constraints on each Feature and Extension as specified by the profile.
  • +
+ +

A transformation processor that conforms to a profile defined in this specification:

+ +
    +
  • SHALL satisfy the Generic Processor Conformance requirements at Section 3.2.1 of [TTML1];
  • + +
  • SHALL satisfy all normative provisions specified by the profile; and
  • + +
  • SHALL implement transformation semantic support for every Feature and Extension designated as permitted by the profile, subject to + any additional constraints on each Feature and Extension as specified by the profile.
  • +
+ +
Note

The use of the term presentation processor (transformation processor) within this specification + does not imply conformance to the DFXP Presentation Profile (DFXP Transformation Profile) specified in [TTML1]. In other + words, it is not considered an error for a presentation processor (transformation processor) to conform to a + profile defined in this specification without also conforming to the DFXP Presentation Profile (DFXP Transformation + Profile).

+ +
Note

This specification does not specify presentation processor or transformation processor behavior when processing or transforming a non-conformant Document Instance.

+ +
Note

The permitted and prohibited + dispositions do not refer to the specification of a + ttp:feature or ttp:extension element as being + permitted or prohibited within a ttp:profile element.

+ +
+ +
+

5. Profiles

+ +
+

5.1 General

+ +

Notwithstanding special cases, e.g. a Document Instance that contains no p, span, br element and no smpte:backgroundImage attribute, it is generally not possible to construct a Document Instance that conforms to the Text Profile and Image Profile simultaneously, and it is not possible to construct a Document Instance that results in the presentation of both text data and image data.

+ +

In applications that require subtitle/caption content in image form to be simultaneously available in text form, two + distinct Document Instances, one conforming to the Text Profile and the other conforming to the Image Profile, + SHOULD be offered. In addition, the Text Profile Document Instance SHOULD be associated with the Image Profile + Document Instance such that, when image content is encountered, assistive technologies have access to its corresponding text + form. The method by which this association is made is left to each application.

+ +
Note

The ittm:altText element specified 6.7.4 ittm:altText also allows text equivalent + string to be associated with an image, e.g. to support indexation of the content and also facilitate quality checking of the + document during authoring.

+ +

Annex D. WCAG Considerations specifically discusses this specification in the context of the [WCAG20] + guidelines.

+
+ +
+

5.2 Text Profile

+ +

The Text Profile consists of Sections 6. Common Constraints and 7. Text Profile Constraints.

+
+ +
+

5.3 Image Profile

+ +

The Image Profile consists of Sections 6. Common Constraints and 8. Image Profile Constraints.

+
+ + +
+

5.4 Profile Resolution Semantics

+ + +

For the purpose of content processing, the determination of the resolved + profile SHOULD take into account both the signaled profile, as defined + in 6.9 Profile Signaling, and profile metadata, as designated by either (or both) + the Document Interchange Context or (and) the Document Processing + Context, which MAY entail inspecting document content.

+ +

If the resolved profile is not a profile supported by the Processor + but is feasibly interoperable with the Text Profile, then the resolved + profile is the Text Profile; otherwise, if the resolved profile is not + a profile supported by the Processor but is feasibly interoperable with + the Image Profile, then the resolved profile is the Image Profile.

+ + +

If the resolved profile is a profile supported by the Processor, then + the Processor SHOULD process the Document Instance according to the + resolved profile. If the resolved profile is neither Text Profile nor + Image Profile, processing is outside the scope of this specification.

+ + +

If the resolved profile is undetermined or not supported by the + Processor, then the Processor SHOULD nevertheless process the Document + Instance using one of its supported profiles, with a preference for the + Text Profile over the Image Profile; otherwise, processing MAY be + aborted.

+ + +
+
+ +
+

6. Common Constraints

+ +
+

6.1 Document Encoding

+ +

A Document Instance SHALL use UTF-8 character encoding as specified in [UNICODE].

+
+ +
+

6.2 Foreign Element and Attributes

+ +

A Document Instance MAY contain elements and attributes that are neither specifically permitted nor forbidden by a + profile.

+ +

A transformation processor SHOULD preserve such elements or attributes whenever possible.

+ +
Note

Document Instances remain subject to the structural requirements of [TTML1].

+
+ +
+

6.3 Namespaces

+ +

The following namespaces (see [xml-names]) are used in this specification:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NamePrefixValueDefining Specification
XMLxmlhttp://www.w3.org/XML/1998/namespace[xml-names]
TTtthttp://www.w3.org/ns/ttml[TTML1]
TT Parameterttphttp://www.w3.org/ns/ttml#parameter[TTML1]
TT Stylingttshttp://www.w3.org/ns/ttml#styling[TTML1]
TT Featurenonehttp://www.w3.org/ns/ttml/feature/[TTML1]
SMPTE-TT Extensionsmptehttp://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt[ST2052-1]
EBU-TT Stylingebuttsurn:ebu:tt:style[EBU-TT-D]
EBU-TT Metadataebuttmurn:ebu:tt:metadata[EBU-TT-D]
IMSC 1.0 Stylingittshttp://www.w3.org/ns/ttml/profile/imsc1#stylingThis specification
IMSC 1.0 Parameterittphttp://www.w3.org/ns/ttml/profile/imsc1#parameterThis specification
IMSC 1.0 Metadataittmhttp://www.w3.org/ns/ttml/profile/imsc1#metadataThis specification
IMSC 1.0 Extensionnonehttp://www.w3.org/ns/ttml/profile/imsc1/extension/This specification
IMSC 1.0 Text Profile Designatornonehttp://www.w3.org/ns/ttml/profile/imsc1/textThis specification
IMSC 1.0 Image Profile Designatornonehttp://www.w3.org/ns/ttml/profile/imsc1/imageThis specification
+ +

The namespace prefix values defined above are for convenience and Document Instances MAY use any prefix value that + conforms to [xml-names].

+ +

The namespaces defined by this specification are mutable [namespaceState]; all undefined names in these namespaces are reserved for future standardization by the W3C.

+
+ +
+

6.4 Overflow

+ +

A Document Instance SHOULD be authored assuming strict clipping of content that falls out of region areas, regardless of + the computed value of tts:overflow for the region.

+ +
Note

As specified in [TTML1], tts:overflow has no effect on the extent of the region, and hence + the total normalized drawing area S(En) at 9.3 Paint Regions.

+
+ + + +
+

6.6 Synchronization

+ +

Each intermediate synchronic document of the Document Instance is intended to be displayed on a specific frame and + removed on a specific frame of the Related Video Object.

+ +

When mapping a media time expression M to a frame F of a Related Video Object, e.g. for the purpose of rendering a + Document Instance onto the Related Video Object, the presentation processor SHALL map M to the frame F with the + presentation time that is the closest to, but not less, than M.

+ + + +
Note

In typical scenario, the same video program (the Related Video Object) will be used for Document Instance + authoring, delivery and user playback. The mapping from media time expression to Related Video Object above allows the author + to precisely associate subtitle video content with video frames, e.g. around scene transitions. In circumstances where the + video program is downsampled during delivery, the application can specify that, at playback, the relative video object be + considered the delivered video program upsampled to is original rate, thereby allowing subtitle content to be rendered at the + same temporal locations it was authored.

+ +
+ +
+

6.7 Extensions

+ +
+

6.7.1 ittp:aspectRatio

+ +

The ittp:aspectRatio attributes allows authorial control of the mapping of the root container of a Document Instance to each image frame of the Related Video Object.

+ +

If present, the ittp:aspectRatio attribute SHALL conform to the following syntax:

+ + + + + + + +
+
+
ittp:aspectRatio
+  : numerator denominator          // with int(numerator) != 0 and int(denominator) != 0
+                                   // where int(s) parses string s as a decimal integer.
+
+numerator | denominator
+  : <digit>+
+
+
+
+ +

The root container of a Document Instance SHALL be mapped to each image frame of the Related Video Object + according to the + following:

+ +
    +
  1. +

    If ittp:aspectRatio is present, the root container SHALL be mapped to a rectangular area within the + image frame such that:

    + +
      +
    1. the ratio of the width to the height of the rectangular area is equal to ittp:aspectRatio,
    2. + +
    3. the center of the rectangular area is collocated with the center of the image frame,
    4. + +
    5. the rectangular area is entirely within the image frame, and
    6. + +
    7. the rectangular area has a height or width equal to that of the image frame.
    8. +
    +
  2. + +
  3. +

    Otherwise, the root container of a Document Instance SHALL be mapped to the image frame in its + entirety.

    +
  4. +
+ +

An ittp:aspectRatio attribute is considered to be significant only when specified on the tt + element.

+ + + +
Note

+The ittp:aspectRatio parameter effectively defines the intended display aspect ratio (DAR) of the root container, while + the tts:extent style property on the root element effectively defines the intended storage aspect ratio (SAR) of the root container. +

+ +
Note
The mapping algorithm above allows the author to + precisely control caption/subtitle position relative to elements within each frame of the video program, e.g. + to match the position of actors. This mapping algorithm does not however specify the presentation of + either the video frame or root container on the ultimate display device. This presentation depends on many factors, including + user input, and can involve displaying only parts of the content. Authors are therefore encouraged to follow best practices + for the intended target applications. Below are selected examples: +
    +
  • A 16:9 video program is authored to ensure adequate presentation on 4:3 display devices using a center-cut. + Accordingly subtitle/captions are authored using ttp:aspectRatio="4 3", allowing the combination + to be displayed on both 4:3 and 16:9 display devices while preserving both caption/subtitles content and the relative position + of caption/subtitles with video elements. +
  • +
  • + A playback system zooms the content of example (a) to fill a 21:9 display, perhaps as instructed by the user. The system elects to scale + the root container to fit vertically within the display (maintaining its aspect ratio as authored), at the cost of losing relative + positioning between caption/subtitles and video elements. +
  • +
  • + The system described in (b) instead elects to map the root container to the video frame, maintaining relative positioning + between caption/subtitles and video elements but at the risk of clipping subtitles/captions. +
  • +
+
+ +
+
+

6.7.2 ittp:progressivelyDecodable

+ +

A progressively decodable Document Instance is structured to facilitate presentation before the document is + received in its entirety, and can be identified using ittp:progressivelyDecodable attribute.

+ +

A progressively decodable Document Instance is a Document Instance that conforms to the following:

+ +
    +
  1. no attribute or element of the TTML timing vocabulary is present within the head element;
  2. + +
  3. given two intermediate synchronic documents A and B + of the Document Instance, with start times TA and TB, respectively, TA is not greater than + TB if A includes a p element that lexically precedes any p + element that B includes; +
  4. + +
  5. no attribute of the TTML timing vocabulary is present on a descendant element of p; and
  6. + +
  7. no element E1 explicitly references another element E2 where the opening tag of E2 is lexically subsequent to the opening tag of E1.
  8. +
+ +

If present, the ittp:progressivelyDecodable attribute SHALL conform to the following syntax:

+ + + + + + + +
+
+
ittp:progressivelyDecodable
+  : "true"
+  | "false"
+
+
+
+ +

An ittp:progressivelyDecodable attribute is considered to be significant only when specified on the + tt element.

+ +

If not specified, the value of ittp:progressivelyDecodable SHALL be considered to be equal to "false".

+ +

A Document Instance for which the computed value of ittp:progressivelyDecodable is "true" SHALL be a + progressively decodable Document Instance.

+ +

A Document Instance for which the computed value of ittp:progressivelyDecodable is "false" is neither + asserted to be a progressively decodable Document Instance nor asserted not to be a progressively decodable Document Instance.

+ +
Example 3
<tt
+  xmlns="http://www.w3.org/ns/ttml"
+  xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
+  xmlns:tts="http://www.w3.org/ns/ttml#styling"
+  xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
+  xmlns:ittp="http://www.w3.org/ns/ttml/profile/imsc1#parameter"
+  ittp:progressivelyDecodable="true"
+  ttp:profile="..."
+ >
+ ...
+</tt>
+ +
Note
+

[TTML1] specifies explicitly referencing of elements identified using xml:id in the following + circumstances:

+ +
    +
  • an element in body referencing region elements. In this case, Requirement 4 above is + always satisfied.
  • + +
  • an element in body referencing style elements. In this case, Requirement 4 above is + always satisfied.
  • + +
  • a region element referencing style elements. In this case, Requirement 4 above is always + satisfied.
  • + +
  • a style element referencing other style elements. In this case, Requirement 4 provides an + optimization of style element ordering within the head element.
  • + +
  • a ttm:actor element referencing a ttm:agent element. In this case, Requirement 4 provides + optimization of metadata elements ordering within the document.
  • + +
  • a content element referencing ttm:agent elements using the ttm:agent attribute. In this + case, Requirement 4 provides optimization of metadata elements ordering within the document.
  • +
+
+
+ +
+

6.7.3 itts:forcedDisplay

+ +

itts:forcedDisplay can be used to hide content whose computed value of tts:visibility is "visible" when the processor has been configured to do so via the application parameter displayForcedOnlyMode.

+ +

If and only if the value of displayForcedOnlyMode is "true", a content element with a itts:forcedDisplay + computed value of "false" SHALL NOT produce any visible rendering, but still affect layout, regardless of the computed + value of tts:visibility.

+ +

The itts:forcedDisplay attribute SHALL conform to the following:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Values:false | true
Initial:false
Applies to:body, div, p, region, span
Inherited:yes
Percentages:N/A
Animatable:discrete
+ +

Annex C. Forced content (non-normative) illustrates the use of itts:forcedDisplay in an application in which a + single document contains both hard of hearing captions and translated foreign language subtitles, using + itts:forcedDisplay to display translation subtitles always, independently of whether the hard of hearing + captions are displayed or hidden.

+ +

The presentation processor SHALL accept an optional boolean parameter called displayForcedOnlyMode, + whose value MAY be set by a context external to the presentation processor. If not set, the value of + displayForcedOnlyMode SHALL be assumed to be equal to "false".

+ +

The algorithm for setting the displayForcedOnlyMode parameter based on the circumstances under which the + Document Instance is presented is left to the application.

+ +
Example 4
...
+<head>
+	...
+	<region xml:id="r1" tts:origin="10% 2%" tts:extent="80% 10%" tts:color="white" itts:forcedDisplay="true" tts:backgroundColor="black"/>
+	<region xml:id="r2" tts:origin="10% 80%" tts:extent="80% 88%" tts:color="white" tts:backgroundColor="black"/>
+	...
+</head>
+...
+<div>
+	 <p region="r1" begin="1s" end="6s">Lycée</p>
+
+	 <!-- the following will not appear if displayForcedOnlyMode='true' -->
+	 <p region="r2" begin="4s" end="6s">Nous étions inscrits au même lycée.</p>
+</div>
+...
+ + +
Note

As specified in [TTML1], the background of a region can be visible even if the computed value of tts:visibility equals + "hidden" for all active content within. The background of a region for which itts:forcedDisplay equals "true" can therefore remain visible even if itts:forcedDisplay equals "false" for all active + content elements within the region and displayForcedOnlyMode equals "true". Authors can avoid this situation, for instance, by ensuring that content + elements and the regions that they are flowed into always have the same value of itts:forcedDisplay.

+ +
Note

Although itts:forcedDisplay, like all the TTML style attributes, has no defined semantics on a + br content element, itts:forcedDisplay will apply to a br content element if it is + either defined on an ancestor content element of the br content element or it is applied to a region element + corresponding to a region that the br content element is being flowed into.

+ +
Note

It is expected that the functionality of itts:forcedDisplay will be mapped to a conditional + style construct in a future revision of this specification.

+ +
Note

The presentation semantics associated with itts:forcedDisplay are intended to be + compatible with those associated with the forcedDisplayMode attribute defined in [CFF].

+
+ +
+

6.7.4 ittm:altText

+ +

ittm:altText allows an author to provide a text string equivalent for an element, typically an image. This + text equivalent MAY be used to support indexing of the content and also facilitate quality checking of the document during + authoring.

+ +

The ittm:altText element SHALL conform to the following syntax:

+ + + + + + + +
+
+
<ittm:altText
+  xml:id = ID
+  xml:lang = string
+  xml:space = (default|preserve)
+  {any attribute not in the default namespace, any TT namespace or any IMSC 1.0 namespace}>
+  Content: #PCDATA
+</ittm:altText>
+
+
+
+ +

The ittm:altText element SHALL be a child of the metadata element.

+ +

8. Image Profile Constraints specifies the use of the ittm:altText element with images.

+ +
Example 5
...
+<div region="r1" begin="1s" end="6s" smpte:backgroundImage="1.png">
+  <metadata>
+  <ittm:altText>Nous étions inscrits au même lycée.</ttm:title>
+  </metadata>
+</div>
+...
+ +
Note

In contrast to the common use of alt attributes in [HTML5], the ittm:altText + attribute content is not intended to be displayed in place of the element if the element is not loaded. The + ittm:altText attribute content can however be read and used by assistive technologies.

+
+ + + +
+ +
+

6.8 Region

+ +
+

6.8.1 Presented Region

+ +

A presented region is a temporally active region that satisfies the following conditions:

+ +
    +
  1. the computed value of tts:opacity is not equal to "0.0"; and
  2. + +
  3. the computed value of tts:display is not "none"; and
  4. + +
  5. the computed value of tts:visibility is not "hidden"; and
  6. + +
  7. either (a) content is selected into the region or (b) the computed value of tts:showBackground is equal + to "always" and the computed value of tts:backgroundColor has non-transparent alpha.
  8. +
+
+ +
+

6.8.2 Dimensions and Position

+ +

All regions SHALL NOT extend beyond the root container, i.e. the intersection of the sets of coordinates belonging to a + region and the sets of coordinates belonging to the root container is the + set of coordinates belonging to the region.

+ +

No two presented regions in a given intermediate synchronic document SHALL + overlap, i.e. the intersection of the sets of coordinates within each presented region is empty.

+
+ +
+

6.8.3 Maximum number

+ +

The number of presented regions in a given intermediate synchronic document SHALL NOT be greater than 4.

+
+
+ +
+

6.9 Profile Signaling

+ +

+ The ttp:profile attribute SHOULD be present on the tt element and equal to the designator of the IMSC1 profile to which the Document Instance conforms, and the ttp:profile element SHOULD NOT be present, unless:

+
    +
  • + the Document Instance also conforms to [EBU-TT-D], in which case the ttp:profile attribute + and the ttp:profile element SHOULD NOT be present, and instead the designator of the IMSC1 profile to which the Document Instance conforms and + the URI "urn:ebu:tt:distribution:2014-01" SHOULD each be carried in an ebuttm:conformsToStandard element as specified in + [EBU-TT-D]; or +
  • +
  • + the Document Instance also conforms to [ttml10-sdp-us], in which case the ttp:profile attribute SHOULD NOT be present. [ttml10-sdp-us] requires that the ttp:profile element be present and that its use attribute be set to a specified value. +
  • +
+ + +

+ The ttp:profile and ebuttm:conformsToStandard elements SHALL NOT signal conformance to both Image Profile and Text Profile in a given Document Instance. +

+ +
+ +
+

6.10 Hypothetical Render Model

+ +

It SHALL be possible to apply the Hypothetical Render Model specified in Section 9. Hypothetical Render Model to any sequence of consecutive intermediate synchronic documents without error as defined in Section 9.2 General.

+
+ +
+

6.11 Features and Extensions

+ +

See 4. Conformance for a definition of permitted and prohibited.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureDispositionAdditional provision
Relative to the TT Feature namespace
#animationpermitted
#backgroundColor-blockpermitted
#backgroundColor-regionpermitted
#cellResolutionpermittedIf the Document Instance includes any length value that uses the c expression, + ttp:cellResolution SHOULD be present on the tt element.
#clockModeprohibited
#clockMode-gpsprohibited
#clockMode-localprohibited
#clockMode-utcprohibited
#corepermitted
#display-blockpermitted
#display-inlinepermitted
#display-regionpermitted
#displaypermitted
#dropModeprohibited
#dropMode-dropNTSCprohibited
#dropMode-dropPALprohibited
#dropMode-nonDropprohibited
#extent-rootpermittedIf the Document Instance includes any length value that uses the px expression, + tts:extent SHALL be present on the tt element.
#extentpermitted
#frameRatepermittedIf the Document Instance includes any clock time expression that uses the frames term or any offset +time expression that uses the f metric, the ttp:frameRate attribute SHALL +be present on the tt element.
#frameRateMultiplierpermitted
#layoutpermitted
#length-cellpermittedc units SHALL NOT be present outside of the value of ebutts:linePadding.
#length-integerpermitted
#length-negativeprohibited
#length-percentagepermitted
#length-pixelpermitted
#length-positivepermitted
#length-realpermitted
#lengthpermitted
#markerModeprohibited
#markerMode-continuousprohibited
#markerMode-discontinuousprohibited
#metadatapermitted
#opacitypermitted
#originpermitted
#overflowpermitted
#overflow-visiblepermitted
#pixelAspectRatioprohibited
#presentationpermittedSee constraints applied to #profile.
#profilepermitted + See 6.9 Profile Signaling. + +
#showBackgroundpermitted
#structurepermitted
#styling-chainedpermitted
#styling-inheritance-contentpermitted
#styling-inheritance-regionpermitted
#styling-inlinepermitted
#styling-nestedpermitted
#styling-referentialpermitted
#stylingpermitted
#subFrameRateprohibited
#tickRatepermittedttp:tickRate SHALL be present on the tt element if the + document contains any time expression that uses the t metric.
#timeBase-clockprohibited
#timeBase-mediapermitted

NOTE: [TTML1] specifies that the default timebase is "media" if + ttp:timeBase is not specified on tt.

#timeBase-smpteprohibited
#time-clock-with-framespermitted
#time-clockpermitted
#time-offset-with-framespermitted
#time-offset-with-tickspermitted
#time-offsetpermitted
#timeContainerpermitted
#timingpermitted +
  • All time expressions within a Document Instance SHOULD use the same syntax, either + clock-time or offset-time.
  • + +
  • For any content element that contains br elements or text nodes or a + smpte:backgroundImage attribute, the begin and end attributes SHOULD + be specified on the content element or at least one of its ancestors.
+
#transformationpermittedSee constraints at #profile.
#visibility-blockpermitted
#visibility-regionpermitted
#writingMode-horizontal-lrpermitted
#writingMode-horizontal-rlpermitted
#writingMode-horizontalpermitted
#zIndexpermitted
ExtensionDispositionProvisions
Relative to the IMSC 1.0 Extension namespace
#aspectRatiopermitted
#forcedDisplaypermitted
#progressivelyDecodablepermitted
#altTextpermitted
+ +
Note

As specified in [TTML1], a #time-offset-with-frames expression is translated to a media time + M according to M = 3600 · hours + 60 · minutes + seconds + (frames ÷ (ttp:frameRateMultiplier · + ttp:frameRate)).

+
+
+ +
+

7. Text Profile Constraints

+ +
+

7.1 Profile Designator

+ +

This profile is associated with the following profile designator:

+ + + + + + + + + + + + + + + + + +
Profile NameProfile Designator
IMSC 1.0 Texthttp://www.w3.org/ns/ttml/profile/imsc1/text
+ +
Note

As specified in 6.11 Features and Extensions, the presence of the ttp:profile attribute is + not required by this profile. The profile designator specified above is intended to be generally used to signal conformance + of a Document Instance to the profile. The details of such signaling depends on the application, and can, for instance, use + metadata structures out-of-band of the Document Instance.

+
+ + + +
+

7.3 Reference Fonts

+ +

The flow of text within a region depends the dimensions and spacing (kerning) between individual glyphs. + The following allows, for instance, region extents to be set such that text flows without clipping.

+ +

When rendering codepoints matching one of the combinations of computed font family and codepoints listed in + A. Reference Fonts, a processor SHALL use a font that generates a glyph sequence whose dimension is substantially + identical to the glyph sequence that would have been generated by one of the specified reference fonts.

+ +
Note

Implementations can use fonts other than those specified in A. Reference Fonts. Two fonts + with equal metrics can have a different appearance, but flow identically.

+ +
+ +
+

7.4 Features and Extensions

+ +

See 4. Conformance for a definition of permitted and prohibited.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureDispositionAdditional provisions
Relative to the TT Feature namespace
#backgroundColor-inlinepermitted
#backgroundColorpermitted
#bidipermitted
#contentpermitted
#colorpermitted

The initial value of tts:color SHALL be "white".

+

NOTE: This is consistent with [ST2052-1].

#directionpermitted
#displayAlignpermitted
#extent-regionpermittedThe tts:extent attribute SHALL be present on all region elements, where it + SHALL use either px units or "percentage" syntax.
+
#fontFamily-genericpermitted

In absence of specific instructions on the choice of font families, and in + order to enhance reproducibility of line fitting, authors are encouraged to + use the monospaceSerif or proportionalSansSerif generic font families, + for which reference font metrics are defined at A. Reference Fonts.

+ +

If the computed value of tts:fontFamily is "default", then the used value of tts:fontFamily SHALL be "monospaceSerif".

+ +

NOTE: The term used value is defined in CSS 2.1, as normatively referenced by [TTML1].

+
#fontFamily-non-genericpermitted
#fontFamilypermitted
#fontSize-anamorphicprohibited
#fontSize-isomorphicpermitted
#fontSizeSee individual disposition of #fontSize-anamorphic and #fontSize-isomorphic.
#fontStyle-italicpermitted
#fontStyle-obliquepermitted
#fontStylepermitted
#fontWeight-boldpermitted
#fontWeightpermitted
#length-empermitted
#lineBreak-uax14The processor SHALL implement the #lineBreak-uax14 feature defined in the TT Feature namespace.
#lineHeightpermittedAs implementation of the "normal" value is not uniform at the time of this writing, tts:lineHeight SHOULD NOT be set to "normal" and SHOULD be explicitly specified such that the specified style set of each p element contains a tts:lineHeight property whose value is not assigned by initial value fallback.
#nested-divpermitted
#nested-spanpermitted
#originpermittedThe tts:origin attribute SHALL use px units or "percentage" representation, and SHALL NOT + use em units.
#padding-1permitted
#padding-2permitted
#padding-3permitted
#padding-4permitted
#paddingpermitted
#textAlign-absolutepermitted
#textAlign-relativepermitted
#textAlignpermitted
#textDecoration-overpermitted
#textDecoration-throughpermitted
#textDecoration-underpermitted
#textDecorationpermitted
#textOutline-blurredprohibited
#textOutline-unblurredpermitted
#textOutlinepermittedThe computed value of tts:textOutline on a span element + SHALL be 10% or less than the computed value of tts:fontSize on the same element.
#unicodeBidipermitted
#visibilitypermitted
#visibility-inlinepermitted
#wrapOptionpermitted
#writingModepermitted
#writingMode-verticalpermitted
ExtensionDispositionProvisions
Relative to the SMPTE-TT Extension Namespace
#imageprohibited
Relative to the IMSC 1.0 Extension namespace
#linePaddingpermitted + +

If used, the attribute ebutts:linePadding MAY be specified on elements region, body, + div and p in addition to style.

+ +

The processor:

+
    +
  • SHALL apply ebutts:linePadding to p only; and
  • +
  • SHALL treat ebutts:linePadding as inheritable.
  • +
+ +

NOTE: The ebutts:linePadding attribute only supports c length units.

+ +
#multiRowAlignpermitted +

If used, the attribute ebutts:multiRowAlign MAY be specified on elements region, body, + div and p in addition to style

+ +

The processor:

+
    +
  • SHALL apply ebutts:multiRowAlign to p only; and
  • +
  • SHALL treat ebutts:multiRowAlign as inheritable.
  • +
+
+ +
Note

In contrast to this specification, [EBU-TT-D] specifies that the attributes ebutts:linePadding and ebutts:multiRowAlign are allowed only on the style element.

+ +
+
+ +
+

8. Image Profile Constraints

+ +
+

8.1 Profile Designator

+ +

This profile is associated with the following profile designator:

+ + + + + + + + + + + + + + + +
Profile NameProfile Designator
IMSC 1.0 Imagehttp://www.w3.org/ns/ttml/profile/imsc1/image
+ +
Note

As specified in 6.11 Features and Extensions, the presence of the ttp:profile attribute is + not required by this profile. The profile designator specified above is intended to be generally used to signal conformance + of a Document Instance to the profile. The details of such signaling depends on the application, and can, for instance, use + metadata structures out-of-band of the Document Instance.

+
+ +
+

8.2 Presented Image

+ +
+

8.2.1 Definition

+ +

A presented image is a div element with a smpte:backgroundImage attribute that flows into a presented region.

+
+ +
+

8.2.2 Constraints

+ +

In a given intermediate synchronic document, each presented region SHALL contain at most one div element, which SHALL be a presented image.

+ +
+ +
+

8.2.3 Intermediate Synchronic Document Construction

+ +

For the purposes of constructing an intermediate synchronic document, a div element with a smpte:backgroundImage attribute SHALL NOT be considered empty.

+ +
+
+ +
+

8.3 smpte:backgroundImage Constraints

+ +

If a smpte:backgroundImage attribute is applied to a div element:

+ +
    +
  • the width and height (in pixels) of the image source referenced by smpte:backgroundImage SHALL be equal + to the width and height (as specified by the tts:extent attribute using px units) of the region in which the + div element is presented;
  • + +
  • the div element SHOULD contain a metadata element containing an ittm:altText element that is a Text Alternative of the image referenced by the smpte:backgroundImage attribute; and
  • + +
  • The smpte:backgroundImage attribute SHALL reference a PNG datastream as specified in [PNG]. If a pHYs chunk is present, it SHALL indicate square pixels. Note that if no pixel aspect ratio is carried, the default of square pixels is assumed.
  • +
+ + +
Note

In [TTML1], tts:extent and tts:origin do not apply to div elements. In order to individually position multiple div elements, each div can be associated with a distinct region with the desired tts:extent and tts:origin.

+
+ +
+

8.4 Features and Extensions

+ +

See 4. Conformance for a definition of permitted and prohibited.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FeatureDispositionAdditional provisions
Relative to the TT Feature namespace
#backgroundColor-inlineprohibited
#backgroundColorSee individual disposition of #backgroundColor-inline, #backgroundColor-region and #backgroundColor-block.
#bidiSee individual disposition of #direction, #unicodeBidi and #writingMode-horizontal.
#colorprohibited
#contentpermittedThe p, span and br elements SHALL NOT be present. See Section 8.2.2 Constraints for constraints on div elements.
#directionprohibited
#displayAlignprohibited
#extent-regionpermittedThe tts:extent attribute SHALL be present on all region elements, where it + SHALL use px units.
#fontFamilyprohibited
#fontFamily-genericprohibited
#fontFamily-non-genericprohibited
#fontSizeprohibited
#fontSize-anamorphicprohibited
#fontSize-isomorphicprohibited
#fontStyleprohibited
#fontStyle-italicprohibited
#fontStyle-obliqueprohibited
#fontWeightprohibited
#fontWeight-boldprohibited
#length-emprohibited
#lineBreak-uax14No processor requirement is specified.
#lineHeightprohibited
#nested-divprohibited
#nested-spanprohibited

NOTE: The prohibition of span elements by this profile implies the prohibition of this feature.

#paddingprohibited
#padding-1prohibited
#padding-2prohibited
#padding-3prohibited
#padding-4prohibited
#textAlignprohibited
#textAlign-absoluteprohibited
#textAlign-relativeprohibited
#textDecorationprohibited
#textDecoration-overprohibited
#textDecoration-throughprohibited
#textDecoration-underprohibited
#textOutlineprohibited
#textOutline-blurredprohibited
#textOutline-unblurredprohibited
#unicodeBidiprohibited
#visibilitySee individual disposition of #visibility-inline, + #visibility-region and #visibility-block.
#visibility-inlineprohibited
#wrapOptionprohibited
#writingModeSee individual disposition of #writingMode-vertical and + #writingMode-horizontal.
#writingMode-verticalprohibited
ExtensionDispositionProvisions
Relative to the SMPTE-TT Extension namespace
#imagepermitted
    +
  • smpte:backgroundImage MAY be used according to 8.3 smpte:backgroundImage Constraints with the semantics of the attribute defined by Section 5.5.2 of [ST2052-1].
  • +
  • smpte:backgroundImageHorizontal and smpte:backgroundImageVertical SHALL NOT be used.
  • +
  • smpte:image SHALL NOT be used.
+
+
+ +
+

9. Hypothetical Render Model

+ +
+ +

9.1 Overview (non-normative)

+ +

This Section specifies the Hypothetical Render Model illustrated in Fig. 1 + Hypothetical Render Model + .

+ +

The purpose of the model is to limit Document Instance complexity. It is not intended as a specification of the + processing requirements for implementations. For instance, while the model defines a glyph buffer for the purpose of + limiting the number of glyphs displayed at any given point in time, it neither requires the implementation of such a + buffer, nor models the sub-pixel character positioning and anti-aliased glyph rendering that can be used to produce text + output.

+ +
+ Hypothetical Render Model + +
Fig. 1 + Hypothetical Render Model +
+
+ +

The model operates on successive intermediate synchronic documents + obtained from an input Document Instance, and uses a simple double buffering model: while an intermediate synchronic + document En is being painted into Presentation Buffer Pn (the "front buffer" of the model), the + previous intermediate synchronic document En-1 is available for display in Presentation Buffer + Pn-1 (the "back buffer" of the model).

+ +

The model specifies an (hypothetical) time required for completely painting an intermediate synchronic document + as a proxy for complexity. Painting includes drawing region backgrounds, rendering and copying glyphs, and decoding and + copying images. Complexity is then limited by requiring that painting of intermediate synchronic document + En completes before the end of intermediate synchronic document En-1.

+ +

Whenever applicable, constraints are specified relative to root container dimensions, allowing subtitle sequences to be + authored independently of Related Video Object resolution.

+ +

To enable scenarios where the same glyphs are used in multiple successive intermediate synchronic documents, e.g. to convey a CEA-608/708-style roll-up (see + [CEA-608] and [CEA-708]), the Glyph Buffers Gn and Gn-1 store rendered glyphs across intermediate synchronic documents, allowing glyphs to be copied into the + Presentation Buffer instead of rendered, a more costly operation.

+ +

Similarly, Decoded Image Buffers Dn and Dn-1 store decoded images across intermediate synchronic documents, allowing images to be copied into the Presentation Buffer instead of decoded.

+
+ +
+

9.2 General

+ +

The Presentation Compositor SHALL render in Presentation Buffer Pn each successive intermediate synchronic + document En using the following steps in order:

+ +
    +
  1. clear the pixels, except for the first intermediate synchronic document E0 for the which the pixels + of P0 SHALL be assumed to have been cleared; +
  2. + +
  3. paint, according to stacking order, all background pixels for each region;
  4. + +
  5. paint all pixels for background colors associated with text or image subtitle content; and
  6. + +
  7. paint the text or image subtitle content.
  8. +
+ +

The Presentation Compositor SHALL start rendering En:

+ +
    +
  • at the presentation time of E0 minus Initial Painting Delay (IPD), if n = 0; or
  • + +
  • at the presentation time of En-1, if n > 0.
  • +
+ +

The duration DUR(En) for painting an intermediate synchronic document En in the + Presentation Buffer Pn SHALL be:

+ +

DUR(En) = S(En) / BDraw + DURT(En) + DURI(En)

+ +

where

+ + + +

The contents of the Presentation Buffer Pn SHALL be transferred instantaneously to Presentation Buffer + Pn-1 at the presentation time of intermediate synchronic document En, making the latter + available for display.

+ +
Note

It is possible for the contents of Presentation Buffer Pn-1 to never be displayed. This can + happen if Presentation Buffer Pn is copied twice to Presentation Buffer Pn-1 between two consecutive + video frame boundaries of the Related Video Object.

+ +

It SHALL be an error for the Presentation Compositor to fail to complete painting pixels for En before the + presentation time of En.

+ +

Unless specified otherwise, the following table SHALL specify values for IPD and BDraw.

+ + + + + + + + + + + + + + + + + + + + + + + +
ParameterInitial value
Initial Painting Delay (IPD)1 s
Normalized background drawing performance factor (BDraw)12 s-1
+ +
Note

BDraw effectively sets a limit on fillings regions - for example, assuming that the root container is + ultimately rendered at 1920×1080 resolution, a BDraw of 12 s-1 would correspond to a fill rate of + 1920×1080×12/s=23.7×220pixels s-1.

+ +
Note

IPD effectively sets a limit on the complexity of any given intermediate synchronic document.

+
+ +
+

9.3 Paint Regions

+ +

The total normalized drawing area S(En) for intermediate synchronic document En SHALL + be

+ +

S(En) = CLEAR(En) + PAINT(En )

+ +

where CLEAR(E0) = 0 and CLEAR(En | n > 0) = 1, i.e. the root container in its entirety.

+ +
Note

To ensure consistency of the Presentation Buffer, a new intermediate synchronic document requires + clearing of the root container.

+ +

PAINT(En) SHALL be the normalized area to be painted for all regions that are used in intermediate synchronic + document En according to:

+ +

PAINT(En) = ∑Ri∈Rp NSIZE(Ri) ∙ NBG(Ri)

+ +

where R_p SHALL be the set of presented regions in the intermediate synchronic + document En.

+ +

NSIZE(Ri) SHALL be given by:

+ +

NSIZE(Ri) = (width of Ri ∙ height of Ri ) ÷ (root container height ∙ root + container width)

+ + + +

NBG(Ri) SHALL be the total number of tts:backgroundColor attributes associated with the given + region Ri in the intermediate synchronic document. A tts:backgroundColor attribute is + associated with a region when it is explicitly specified (either as an attribute in the element, or by reference to a + declared style) in the following circumstances:

+ +
    +
  • it is specified on the region layout element that defines the region; or
  • + +
  • it is specified on a div, p, span or br content element that is + to be flowed into the region for presentation in the intermediate synchronic document (see [TTML1] for more + details on when a content element is followed into a region); or +
  • + +
  • it is specified on a set animation element that is to be applied to content elements that are to be + flowed into the region for presentation in the intermediate synchronic document (see [TTML1] for more details + on when a set animation element is applied to content elements). +
  • +
+ +

Even if a specified tts:backgroundColor is the same as specified on the nearest ancestor content element or + animation element, specifying any tts:backgroundColor SHALL require an additional fill operation for all + region pixels.

+
+ +
+

9.4 Paint Images

+ +

The Presentation Compositor SHALL paint into the Presentation Buffer Pn all visible pixels of presented + images of intermediate synchronic document En.

+ +

For each presented image, the Presentation Compositor SHALL either:

+ +
    +
  • if an identical image is present in Decoded Image Buffer Dn, copy the image from Decoded Image Buffer + Dn to the Presentation Buffer Pn using the Image Copier; or
  • + +
  • if an identical image is present in Decoded Image Buffer Dn-1, i.e. an identical image was present in + intermediate synchronic document En-1, copy using the Image Copier the image from Decoded Image Buffer + Dn-1 to both the Decoded Image Buffer Dn and the Presentation Buffer Pn; or +
  • + +
  • otherwise, decode the image using the Image Decoder the image into the Presentation Buffer Pn and Decoded + Image Buffer Dn.
  • +
+ +

Two images SHALL be identical if and only if they reference the same encoded image source.

+ +

The duration DURI(En) for painting images of an intermediate synchronic document + En in the Presentation Buffer SHALL be as follows:

+ +

DURI(En) = ∑Ii ∈ Ic NRGA(Ii) / ICpy + + ∑Ij ∈ Id NSIZ(Ij) / IDec

+ +

where

+ + + +

NRGA(Ii) is the Normalized Image Area of presented image Ii and SHALL be equal to:

+ +

NRGA(Ii)= (width of Ii ∙ height of Ii ) ÷ ( root container height ∙ root + container width )

+ +

NSIZ(Ii) SHALL be the number of pixels of presented image Ii.

+ +

The contents of the Decoded Image Buffer Dn SHALL be transferred instantaneously to Decoded Image Buffer + Dn-1 at the presentation time of intermediate synchronic document En.

+ +

The total size occupied by images stored in Decoded Image Buffers Dn or Dn-1 SHALL be the sum of + their Normalized Image Area.

+ +

The size of Decoded Image Buffers Dn or Dn-1 SHALL be the Normalized Decoded Image Buffer Size + (NDIBS).

+ +

Unless specified otherwise, the following table SHALL specify ICpy, IDec, and NDBIS.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterInitial value
Normalized image copy performance factor (ICpy)6
Image Decoding rate (IDec)1 × 220 pixels s-1
Normalized Decoded Image Buffer Size (NDIBS)0.9885
+
+ +
+

9.5 Paint Text

+ +

In the context of this section, a glyph is a tuple consisting of (i) one character and (ii) the computed values of the following + style properties:

+ +
    +
  • tts:color
  • + +
  • tts:fontFamily
  • + +
  • tts:fontSize
  • + +
  • tts:fontStyle
  • + +
  • tts:fontWeight
  • + +
  • tts:textDecoration
  • + +
  • tts:textOutline
  • +
+ + +
Note

While one-to-one mapping between characters and typographical glyphs is generally the rule in some scripts, + e.g. latin script, it is the exception in others. For instance, in arabic script, a character can + yield multiple glyphs depending on its position in a word. The Hypothetical Render Model + always assumes a one-to-one mapping, but reduces the performance of the glyph buffer for scripts where one-to-one mapping + is not the general rule (see GCpy below).

+ +

For each glyph associated with a character in a presented region of intermediate synchronic document En, + the Presentation Compositor SHALL:

+ +
    +
  • if an identical glyph is present in Glyph Buffer Gn, copy the glyph from Glyph Buffer Gn to the + Presentation Buffer Pn using the Glyph Copier; or
  • + +
  • if an identical glyph is present in Glyph Buffer Gn-1, i.e. an identical glyph was present in intermediate + synchronic document En-1, copy using the Glyph Copier the glyph from Glyph Buffer Gn-1 to both the + Glyph Buffer Gn and the Presentation Buffer Pn; or
  • + +
  • otherwise render using the Glyph Renderer the glyph into the Presentation Buffer Pn and Glyph Buffer + Gn.
  • +
+ +
+ Example of Presentation Compositor Behavior for Text Rendering + +
Fig. 2 + Example of Presentation Compositor Behavior for Text Rendering +
+
+ +

The duration DURT(En) for rendering the text of an intermediate synchronic document + En in the Presentation Buffer is as follows:

+ +

DURT(En) = ∑gi ∈ Γr NRGA(gi) / Ren(gi) + + ∑gj ∈ Γc NRGA(gj) / GCpy

+ +

where

+ + + +

The Normalized Rendered Glyph Area NRGA(gi) of a glyph gi SHALL be equal to:

+ +

NRGA(gi) = (fontSize of gi as percentage of root container height)2

+ +
Note

NRGA(Gi) does not take into account decorations (e.g. underline), effects (e.g. + outline) or actual typographical glyph aspect ratio. An implementation can determine an actual buffer size needs based on worst-case + glyph size complexity.

+ +

The contents of the Glyph Buffer Gn SHALL be copied instantaneously to Glyph Buffer Gn-1 at the + presentation time of intermediate synchronic document En.

+ +

It SHALL be an error for the sum of NRGA(gi) over all glyphs Glyph Buffer Gn to be larger + than the Normalized Glyph Buffer Size (NGBS).

+ +

Unless specified otherwise, the following table specifies values of GCpy, Ren and NGBS.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Normalized glyph copy performance factor (GCpy)
Script property (see Standard Annex #24 at [UNICODE]) for the + character of giGCpy
latin, greek, cyrillic, hebrew or common12
any other value3
Text rendering performance factor Ren(Gi)
Block property (see [UNICODE]) for the character of giRen(Gi)
CJK Unified Ideograph0.6
any other value1.2
Normalized Glyph Buffer Size (NGBS)
1
+ +
Note

The choice of font by the presentation processor can increase rendering complexity. + For instance, a cursive font can generally result in a given character yielding different typographical glyphs depending + on context, even if latin script is used.

+ + + + + + +
+
+ + +
+

A. Reference Fonts

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Computed Font FamilyCode PointsReference Font
monospaceSerif + All code points specified in B. Recommended Character Sets + + Courier New + or + Liberation Mono +
proportionalSansSerif + All code points specified in B. Recommended Character Sets, excluding the code points + defined for Hebrew and Arabic scripts. + + Arial or Helvetica or Liberation Sans +
+ +
+ + + + +
+

C. Forced content (non-normative)

+ +

Fig. 3 + Illustration of the use of itts:forcedDisplay + below illustrates the use of forced content, i.e. itts:forcedDisplay and + displayForcedOnlyMode. The content with itts:forcedDisplay="true" is the French translation of the + "High School" sign. The content with itts:forcedDisplay="false" are French subtitles capturing a voiceover.

+ +
+ Illustration of the use of itts:forcedDisplay + +
Fig. 3 + Illustration of the use of itts:forcedDisplay +
+
+ +

When the user selects French as the playback language but does not select French subtitles, + displayForcedOnlyMode is set to "true", causing the display of the sign translation, which is useful to any French + speaker, but hiding the voiceover subtitles as the voiceover is heard in French.

+ +

If the user selects French as the playback language and also selects French subtitles, e.g. if the user is hard-of-hearing, + displayForcedOnlyMode is set to "false", causing the display of both the sign translation and the voiceover + subtitles.

+ +

The algorithm for setting the displayForcedOnlyMode parameter and selecting the appropriate combination of + subtitle and audio tracks depends on the application.

+
+ +
+

D. WCAG Considerations

+ +

In order to meet the guidelines in [WCAG20], the following considerations apply.

+ +

Guideline 1.1 of [WCAG20] recommends that an implementation provide Text Alternatives for all non-text content. In the + context of this specification, this Text Alternative is intended primarily to support users of the subtitles who cannot see + images. Since the images of an Image Profile Document Instance usually represent subtitle or caption text, the + guidelines for authoring text equivalent strings given at Images of text of [HTML5] are + appropriate.

+ +

Thus, for each subtitle in an Image Profile Document Instance, a text equivalent content in a Text Profile + Document Instance SHOULD be written so that it conveys all essential content and fulfills the same function as the + corresponding subtitle image. In the context of subtitling and captioning, this content will be (as a minimum) the verbatim + equivalent of the image without précis or summarization. However, the author MAY include extra information to the text + equivalent string in cases where styling is applied to the text image with a deliberate connotation, as a functional + replacement for the applied style.

+ +

For instance, in subtitling and captioning, italics can be used to indicate an off screen speaker context (for example a + voice from a radio). An author can choose to include this functional information in the text equivalent; for example, by + including the word "Radio: " before the image equivalent text. Note that images in an Image Profile + Document Instance that are intended for use as captions, i.e. intended for a hard of hearing audience, might already + include this functional information in the rendered text.

+ +

Guideline 1.1 of [WCAG20] also recommends that accessible Text Alternatives must be "programmatically determinable." This + means that the text must be able to be read and used by the assistive technologies (and the accessibility features in browsers) + that people with disabilities use. It also means that the user must be able to use their assistive technology to find the + alternative text (that they can use) when they land on the non-text content (that they can't use).

+
+ +
+

E. Sample Document Instance (non-normative)

+ +

The following sample Document Instances conforms to the Text Profile and Image Profile, respectively. These samples are for illustration only, and are neither intended to capture current or future practice, nor exercise all normative prose contained in this specification.

+ +
Example 10
<?xml version="1.0" encoding="UTF-8"?>
+<tt xml:lang="en"
+    xmlns="http://www.w3.org/ns/ttml"
+    xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
+    xmlns:tts="http://www.w3.org/ns/ttml#styling"
+    xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
+    xmlns:ittp="http://www.w3.org/ns/ttml/profile/imsc1#parameter"
+    ittp:aspectRatio="4 3"
+    ttp:profile="http://www.w3.org/ns/ttml/profile/imsc1/text">
+
+    <head>
+        <layout>
+            <region xml:id="area1" tts:origin="10% 10%" tts:extent="80% 10%" tts:backgroundColor="black" tts:displayAlign="center" tts:color="red"/>
+        </layout>
+    </head>
+    <body>
+        <div>
+            <p region="area1" begin="0s" end="6s">Lorem ipsum dolor sit amet.</p>
+        </div>
+    </body>
+</tt>
+ + +
Example 11
<?xml version="1.0" encoding="UTF-8"?>
+<tt xml:lang="fr"
+    xmlns="http://www.w3.org/ns/ttml"
+    xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
+    xmlns:tts="http://www.w3.org/ns/ttml#styling"
+    xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
+    xmlns:smpte="http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt"
+    xmlns:itts="http://www.w3.org/ns/ttml/profile/imsc1#styling"
+    tts:extent="640px 480px"
+    ttp:frameRate="25"
+    ttp:profile="http://www.w3.org/ns/ttml/profile/imsc1/image">
+
+    <head>
+        <layout>
+            <region xml:id="region1" tts:origin="120px 410px" tts:extent="240px 40px" tts:showBackground="whenActive"/>
+            <region xml:id="region2" tts:origin="120px 20px" tts:extent="240px 40px" tts:showBackground="whenActive"/>
+        </layout>
+    </head>
+    <body>
+        <div region="region1" begin="00:00:01:00" end="00:00:02:00" smpte:backgroundImage="1.png"/>
+        <div region="region1" begin="00:00:03:20" end="00:00:04:12" smpte:backgroundImage="2.png"/>
+        <div region="region2" itts:forcedDisplay="true" begin="00:00:03:20" end="00:00:04:12" smpte:backgroundImage="3.png"/>
+    </body>
+</tt>
+
+ +
+

F. Extensions

+ +
+

F.1 General

+ +

The following sections define extension designations, expressed as relative URIs (fragment identifiers) relative to the + IMSC 1.0 Extension Namespace base URI.

+
+ +
+

F.2 #progressivelyDecodable

+ +

A transformation processor supports the #progressivelyDecodable feature if it recognizes and is + capable of transforming values of the ittp:progressivelyDecodable.

+ +

A presentation processor supports the #progressivelyDecodable feature if it implements presentation + semantic support for values of the ittp:progressivelyDecodable + attribute.

+
+ +
+

F.3 #aspectRatio

+ +

A transformation processor supports the #aspectRatio feature if it recognizes and is capable of + transforming values of the ittp:aspectRatio.

+ +

A presentation processor supports the #aspectRatio feature if it implements presentation semantic + support for values of the ittp:aspectRatio attribute.

+
+ +
+

F.4 #forcedDisplay

+ +

A transformation processor supports the #forcedDisplay feature if it recognizes and is capable of + transforming values of the itts:forcedDisplay.

+ +

A presentation processor supports the #forcedDisplay feature if it implements presentation semantic + support for values of the itts:forcedDisplay attribute.

+
+ +
+

F.5 #altText

+ +

A transformation processor supports the #altText feature if it recognizes and is capable of + transforming values of the ittm:altText element.

+ +

A presentation processor supports the #altText feature if it implements presentation semantic support + for values of the ittm:altText element.

+
+ +
+

F.6 #linePadding

+ +

A transformation processor supports the #linePadding feature if it recognizes and is capable of + transforming values of the ebutts:linePadding attribute specified in [EBU-TT-D].

+ +

A presentation processor supports the #linePadding feature if it implements presentation semantic + support for values of the ebutts:linePadding attribute specified in [EBU-TT-D].

+
+ +
+

F.7 #multiRowAlign

+ +

A transformation processor supports the #multiRowAlign feature if it recognizes and is capable of + transforming values of the ebutts:multiRowAlign attribute specified in [EBU-TT-D].

+ +

A presentation processor supports the #multiRowAlign feature if it implements presentation semantic + support for values of the ebutts:multiRowAlign attribute specified in [EBU-TT-D].

+
+
+ +
+

G. XML Schema Definitions (non-normative)

+ +

XML Schema definitions (see [xmlschema-1]) for extension vocabulary defined + by this specification are provided here for convenience.

+ +

These definitions are non-normative and are not sufficient to validate conformance of a Document Instance.

+ +

In any case where a definition specified by this appendix diverge from the prose of the specification, + then the latter takes precedence.

+ + +
+ + +
+

H. Extensibility Objectives (non-normative)

+ +

This section documents extensibility objectives for this specification.

+ +

This specification is intended to allow:

+
    +
  • other profiles of TTML and future revisions of this specification to specify support for documents and/or processors conforming to Text Profile or Image Profile, in addition to specifying additional extensions;
  • +
  • subject to the structural requirements of [TTML1], content from external namespaces to be present in a document that conforms to Text Profile or Image Profile (a) without affecting transformation or presentation, and (b) to be carried through by a transformation processor (see 6.2 Foreign Element and Attributes);
  • +
  • a document that conforms to Text Profile or Image Profile to be embedded in other XML documents.
  • +
+
+ +
+

I. Compatibility with other TTML-based specifications (non-normative)

+ +
+

I.1 Overview

+ +

This specification is designed to be compatible with [ST2052-1], [EBU-TT-D] and [ttml10-sdp-us]. + Specifically, it is possible to create a document that:

+ + +

This specification is also intended to allow straightforward conversion of a document that conforms to the text or image profiles of [CFF] to the Text Profile or Image Profile, respectively.

+ +
+ + +
+

I.2 EBU-TT-D

+ +

The Text Profile is a strict syntactic superset of [EBU-TT-D].

+ +

A document that conforms to [EBU-TT-D] therefore generally also conforms to the Text Profile, with a few exceptions, including:

+ + +

Note that the ttp:profile attribute is not allowed by [EBU-TT-D], and the ebuttm:conformsToStandard element is used instead to signal Text Profile, as specified in 6.9 Profile Signaling.

+ +

It is not possible for a document that conforms to [EBU-TT-D] to also conform to Image Profile, and vice-versa, notwithstanding the special case where the document also conforms to Text Profile as noted at 5. Profiles.

+ +

The following is an example of a document that conforms to both Text Profile and [EBU-TT-D]. Note the presence of two ebuttm:conformsToStandard elements, one of which equals the Text Profile designator:

+ +
Example 12
<?xml version="1.0" encoding="UTF-8"?>
+<tt xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
+    xmlns:tts="http://www.w3.org/ns/ttml#styling" xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
+    xmlns:ebutts="urn:ebu:tt:style" xml:lang="en" ttp:timeBase="media" xmlns:ebuttm="urn:ebu:tt:metadata" >
+    <head>
+        <metadata>
+            <ebuttm:documentMetadata>
+                <ebuttm:conformsToStandard>urn:ebu:tt:distribution:2014-01</ebuttm:conformsToStandard>
+                <ebuttm:conformsToStandard>http://www.w3.org/ns/ttml/profile/imsc1/text</ebuttm:conformsToStandard>
+            </ebuttm:documentMetadata>
+        </metadata>
+        <styling>
+            <style xml:id="baseStyle" tts:color="#FFFFFF" tts:lineHeight="100%"/>
+            <style xml:id="blackBackground" tts:backgroundColor="#000000"/>
+            <style xml:id="greenBackground" tts:backgroundColor="#00FF00"/>
+            <style xml:id="startEnd" tts:textAlign="start" ebutts:multiRowAlign="end"/>
+            <style xml:id="centerStart" tts:textAlign="center" ebutts:multiRowAlign="start"/>
+        </styling>
+        <layout>
+            <region xml:id="area1" tts:origin="15% 10%" tts:extent="70% 20%" style="greenBackground" tts:displayAlign="center"/>
+            <region xml:id="area2" tts:origin="15% 70%" tts:extent="70% 20%" style="blackBackground" tts:displayAlign="center"/>
+        </layout>
+    </head>
+    <body>
+        <div style="baseStyle">
+            <p xml:id="s1" region="area1" style="startEnd" begin="00:00:01" end="00:00:09">
+                multiRowAlign="end"<br/>textAlign="start"
+            </p>
+            <p xml:id="s2" region="area2" style="centerStart" begin="00:00:01" end="00:00:09">
+                multiRowAlign="start"<br/>textAlign="center"
+            </p>
+        </div>
+    </body>
+</tt>
+ +
+ +
+

I.3 SDP-US

+ +

The Text Profile is a strict syntactic superset of [ttml10-sdp-us].

+ +

A document that conforms to [ttml10-sdp-us] therefore also generally conforms to the Text Profile, with a few exceptions, including:

+
    +
  • [ttml10-sdp-us] does not constrain document complexity using an HRM.
  • +
+ +

[ttml10-sdp-us] requires a specific value of the use + attribute of the ttp:profile. As a result, Text Profile is + not signaled using the ttp:profile attribute. Instead, as + specified in 5.4 Profile Resolution Semantics, the Text Profile can be + signaled by the Document Interchange Context and/or the Document + Processing Context. Alternatively, a processor can choose to process a + document as a Text Profile document if the ttp:profile + element signals [ttml10-sdp-us], since [ttml10-sdp-us] is feasibly + interoperable with Text Profile.

+ + + +

It is not possible for a document that conforms to [ttml10-sdp-us] to also conform to Image Profile, and vice-versa, notwithstanding the special case where the document also conforms to Text Profile as noted at 5. Profiles.

+ +

As an illustration, Example 3 at [ttml10-sdp-us] conforms to both Text Profile and [ttml10-sdp-us].

+ +
+ +
+

I.4 SMPTE-TT (SMPTE ST 2052-1)

+ +

[ST2052-1] specifies the use of the DFXP Full Profile (see Appendix F.3 at [TTML1]) supplemented by a number of extensions, including http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt#image.

+ +

This specification defines practical constraints on [ST2052-1], supplemented by a few extensions defined at F. Extensions. These constraints and extensions are intended to reflect industry practice.

+ +

As a result, particular care is required when creating a document intended to be processed according to both [ST2052-1] and Text Profile or Image Profile. In particular:

+
    + +
  • in contrast to Text Profile and Image Profile, [ST2052-1] allows documents to contain both smpte:backgroundImage attributes and any of p, span, or br elements;
  • + +
  • Image Profile allows only a subset of the http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt#image extension;
  • + +
  • [ST2052-1] does not support the #aspectRatio, #forcedDisplay, + #linePadding and #multiRowAlign extensions that impact presentation; and
  • + +
  • when the designator "http://www.smpte-ra.org/schemas/2052-1/2010/profiles/smpte-tt-full" is used as a value for ttp:profile element or attribute (see Section 5.8 at [ST2052-1]), Text Profile or Image Profile is signaled by the Document Interchange Context and/or the Document Processing Context.
  • + +
+ + +

The following is an example of a document that conforms to both Text Profile and [ST2052-1]:

+ +
Example 13
<?xml version="1.0" encoding="UTF-8"?>
+<tt xml:lang="en" xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
+    xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:profile="http://www.smpte-ra.org/schemas/2052-1/2010/profiles/smpte-tt-full"
+    xmlns:tts="http://www.w3.org/ns/ttml#styling" ttp:frameRate="24">
+    <head>
+        <layout>
+            <region xml:id="area1" tts:origin="10% 70%" tts:extent="80% 20%" tts:showBackground="whenActive" tts:backgroundColor="red" tts:displayAlign="center" tts:color="white"/>
+        </layout>
+    </head>
+    <body tts:lineHeight="100%">
+        <div>
+            <p region="area1" begin="00:00:01.01" end="00:00:03">This should appear on frame 25.</p>
+            <p region="area1" begin="00:00:04" end="00:00:06">This should appear on frame 96.</p>
+            <p region="area1" begin="00:00:07.33" end="00:00:09">This should appear on frame 176.</p>
+        </div>
+    </body>
+</tt>
+ +
+ +
+

I.5 CFF-TT

+ +

This specification was derived from the text and image profiles + specified in Section 6 at [CFF], and is intended to be a superset in + terms of capabilities. Additional processing is however generally necessary to + convert a document from [CFF] to this specification. In particular:

+ +
    + +
  • the namespace of the progressivelyDecodable attribute is different;
  • + +
  • the forcedDisplayMode attribute in [CFF] is renamed to + forcedDisplay in this specification;
  • + +
  • the [CFF] HRM does not specifies GCpy as a function of script;
  • + +
  • in [CFF], the attribute ttp:frameRate is not subject to the requirements specified at 6.11 Features and Extensions; and
  • + +
  • [CFF] requires the use of the ttp:profile element, whereas this + specification recommends the use of the ttp:profile attribute.
  • + +
+ +
+ +
+ + + +

J. References

J.1 Normative references

[CLDR]
Unicode Consortium. The Common Locale Data Repository Project +
[EBU-TT-D]
European Broadcasting Union (EBU). Tech 3380, EBU-TT-D Subtitling Distribution Format Version 1.0 +
[PNG]
Tom Lane. Portable Network Graphics (PNG) Specification (Second Edition). 10 November 2003. W3C Recommendation. URL: http://www.w3.org/TR/PNG +
[RFC2119]
S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 +
[ST2052-1]
SMPTE ST 2052-1, Timed Text Format (SMPTE-TT) URL: https://www.smpte.org/standards +
[TTML1]
Glenn Adams. Timed Text Markup Language 1 (TTML1) (Second Edition). 24 September 2013. W3C Recommendation. URL: http://www.w3.org/TR/ttml1/ +
[UNICODE]
The Unicode Standard. URL: http://www.unicode.org/versions/latest/ +
[WCAG20]
Ben Caldwell; Michael Cooper; Loretta Guarino Reid; Gregg Vanderheiden et al. Web Content Accessibility Guidelines (WCAG) 2.0. 11 December 2008. W3C Recommendation. URL: http://www.w3.org/TR/WCAG20/ +
[ttml10-sdp-us]
Glenn Adams; Monica Martin; Sean Hayes. TTML Simple Delivery Profile for Closed Captions (US). 5 February 2013. W3C Note. URL: http://www.w3.org/TR/ttml10-sdp-us/ +
[xml-names]
Tim Bray; Dave Hollander; Andrew Layman; Richard Tobin; Henry Thompson et al. Namespaces in XML 1.0 (Third Edition). 8 December 2009. W3C Recommendation. URL: http://www.w3.org/TR/xml-names +

J.2 Informative references

[CEA-608]
Line-21 Data Services, ANSI/CEA Standard. +
[CEA-708]
Digital Television (DTV) Closed Captioning, ANSI/CEA Standard. +
[CFF]
Digital Entertainment Content Ecosystem (DECE). Common File Format & Media Formats Specification (CFF) Version 2.2. +
[HTML5]
Ian Hickson; Robin Berjon; Steve Faulkner; Travis Leithead; Erika Doyle Navara; Edward O'Connor; Silvia Pfeiffer. HTML5. 28 October 2014. W3C Recommendation. URL: http://www.w3.org/TR/html5/ +
[SUBM]
World Wide Web Consortium (W3C). TTML Text and Image Profiles for Internet Media Subtitles and Captions (Member Submission, 07 June 2013) +
[namespaceState]
Norman Walsh. The Disposition of Names in an XML Namespace. 29 March 2006. W3C Working Draft. URL: http://www.w3.org/TR/namespaceState/ +
[xmlschema-1]
Henry Thompson; David Beech; Murray Maloney; Noah Mendelsohn et al. XML Schema Part 1: Structures Second Edition. 28 October 2004. W3C Recommendation. URL: http://www.w3.org/TR/xmlschema-1/ +
From 9842efc027fdc1dc11cee3f11c73c272b4b4482a Mon Sep 17 00:00:00 2001 From: tripu Date: Thu, 17 Mar 2016 02:58:12 +0900 Subject: [PATCH 09/23] Better deliverer rule: only within SOTD div --- lib/rules/metadata/deliverers.js | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/lib/rules/metadata/deliverers.js b/lib/rules/metadata/deliverers.js index 65b70b069..3089def87 100644 --- a/lib/rules/metadata/deliverers.js +++ b/lib/rules/metadata/deliverers.js @@ -16,20 +16,23 @@ exports.check = function(sr, done) { , found = {} ; - sr.$('a').each(function() { - const item = sr.$(this); - // if (item.text().toLowerCase().indexOf('group') > -1) console.log('---' + item.text() + '---'); - if (REGEX_GROUP.test(item.text())) { - const name = item.text().trim() - , url = item.attr('href') - ; - if (!found[util.normaliseURI(url)]) { - found[util.normaliseURI(url)] = true; - result.push({name: name, homepage: url}); + if (sr && sr.getSotDSection() && sr.getSotDSection().filter('p')) { + sr.getSotDSection().filter('p').find('a[href]').each(function() { + const item = sr.$(this); + if (REGEX_GROUP.test(item.text())) { + const name = item.text().trim() + , url = item.attr('href') + ; + if (!found[util.normaliseURI(url)]) { + found[util.normaliseURI(url)] = true; + result.push({name: name, homepage: url}); + } } - } - }); - - done({detectedDeliverers: result}); + }); + done({detectedDeliverers: result}); + } + else { + done(); + } }; From f7e7daa37ce1394fb62e1cbe2c92b7e53fca98aa Mon Sep 17 00:00:00 2001 From: tripu Date: Thu, 17 Mar 2016 03:01:06 +0900 Subject: [PATCH 10/23] Work on validator and API module --- lib/api.js | 44 +++----------------------------------------- lib/validator.js | 4 ++-- 2 files changed, 5 insertions(+), 43 deletions(-) diff --git a/lib/api.js b/lib/api.js index aa78faefe..51bac2c92 100644 --- a/lib/api.js +++ b/lib/api.js @@ -56,16 +56,11 @@ const parseRequest = function(req, res) { res.status(400).send('At least one of "url", "source", "file" or "document" must be specified.'); } else { - var done = false; v = new validator.Specberus handler = new Sink(function(data) { - console.dir(data); - // if (!done) res.status(500).send(data); - // done = true; + res.status(500).send(data); }, function(data) { - console.dir(data); - if (!done) res.status(200).send(v.metadata); - done = true; + res.status(200).send(v.meta); }) ; options.events = handler; @@ -98,40 +93,7 @@ const parseRequest = function(req, res) { }; const setUp = function(app) { - - app.post('/api/*', parseRequest); /* function(req, res) { - var v - , file - , profile - , handler - , options - ; - if ('/api/validate' === req.path) { - v = new validator.Specberus; - file = req.query.file; - profile = profiles[req.query.profile]; - handler = new Sink(function(data) { - console.log(data); - }, function(data) { - console.log(v.detectedProfile); - }); - options = {file: file, events: handler, profile: profile}; - v.validate(options); - } - else if ('/api/metadata' === req.path) { - v = new validator.Specberus; - file = req.query.file; - handler = new Sink(function(data) { - console.log(data); - }, function(data) { - console.log(v.detectedProfile); - }); - options = {file: file, events: handler, profile: profileMetadata}; - v.validate(options); - } - res.end(); - }); */ - + app.post('/api/*', parseRequest); }; exports.setUp = setUp; diff --git a/lib/validator.js b/lib/validator.js index 0db0b654f..c5043a1d7 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -43,7 +43,7 @@ Specberus.prototype.extractMetadata = function (options) { } self.config = {lang: 'en_GB'}; - self.metadata = {}; + self.meta = {}; var seenErrors = {}; self.sink.on("err", function (name) { seenErrors[name] = true; }); var doValidation = function (err, query) { @@ -58,7 +58,7 @@ Specberus.prototype.extractMetadata = function (options) { if (result) { // console.dir(result); for (var i in result) { - self.metadata[i] = result[i]; + self.meta[i] = result[i]; } } done++; From 525d7228a31019bbb346b904b9a2cf2f638b477a Mon Sep 17 00:00:00 2001 From: tripu Date: Thu, 17 Mar 2016 03:28:58 +0900 Subject: [PATCH 11/23] Fix 2 typos (detected by running the test suite) --- test/samples.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/samples.json b/test/samples.json index e3f8175d5..2e2d27377 100644 --- a/test/samples.json +++ b/test/samples.json @@ -38,7 +38,7 @@ , "profile": "WG-NOTE" , "deliverers": [ { - "name": "CVS on the Web Working Group" + "name": "CSV on the Web Working Group" , "homepage": "http://www.w3.org/2013/csvw/" } ] @@ -49,7 +49,7 @@ , "profile": "REC" , "deliverers": [ { - "name": "CVS on the Web Working Group" + "name": "CSV on the Web Working Group" , "homepage": "http://www.w3.org/2013/csvw" } ] From a979e489a0783c298c75ce15878a7f461127377f Mon Sep 17 00:00:00 2001 From: tripu Date: Thu, 17 Mar 2016 03:29:44 +0900 Subject: [PATCH 12/23] Fix tests for rules (metadata); comment API tests Add "chai" to devDependencies. --- package.json | 1 + test/api.js | 4 +- test/rules.js | 121 ++++++++++++++++++++++++++++++++------------------ 3 files changed, 80 insertions(+), 46 deletions(-) diff --git a/package.json b/package.json index c5c34cb1b..33a60cce7 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "whacko": "0.19" }, "devDependencies": { + "chai": "3.5", "coveralls": "2.11", "expect.js": "0.3", "istanbul": "0.4", diff --git a/test/api.js b/test/api.js index b1913024b..5e92b594c 100644 --- a/test/api.js +++ b/test/api.js @@ -31,7 +31,7 @@ if (!process || !process.env || !process.env.SKIP_NETWORK) { describe('API', function() { - it('The endpoint should exist', function() { +/* it('The endpoint should exist', function() { // @TODO }); @@ -58,7 +58,7 @@ if (!process || !process.env || !process.env.SKIP_NETWORK) { // @TODO }; // @TODO; submit a few sample specs for validation; check results. - }); + }); */ }); diff --git a/test/rules.js b/test/rules.js index e14ff1c18..2ecf1bad8 100644 --- a/test/rules.js +++ b/test/rules.js @@ -4,15 +4,17 @@ // Settings: const DEBUG = false -, METADATA_PROFILE = 'profile' -, METADATA_DELIVERERS = 'deliverers' +, META_PROFILE = 'profile' +, META_DELIVERERS = 'deliverers' ; // Native packages: const pth = require('path'); // External packages: -const expect = require('expect.js'); +const expect = require('expect.js') +, chai = require('chai').expect +; // Internal packages: const validation = require('./validation') @@ -22,52 +24,74 @@ const validation = require('./validation') ; /** - * Assert that the profile detected in a spec is equal to the known profile. + * Compare two arrays of "deliverers" and check that they're equivalent. + * + * @param {Array} a1 - One array. + * @param {Array} a2 - The other array. + * @returns {Boolean} whether the two structures are really the same. + */ + +const equivalentDeliverers = function(a1, a2) { + if (a1 && a2 && a1.length === a2.length) { + var j + , found = 0; + for(var i = 0; i < a1.length; i ++) { + j = 0; + while (i === found && j < a2.length) { + if (a1[i].name === a2[j].name && a1[i].homepage === a2[j].homepage) { + found++; + } + else { + j++; + } + } + } + return (found === a1.length); + } else { + return false; + } +}; + +/** + * Assert that some metadata detected in a spec is equal to the expected value. * * @param {String} url - public URL of a spec. - * @param {String} profile - profile that should be detected. - * @param {Array} deliverers - set of deliverers that should be detected. + * @param {String} file - name of local file containing a spec (without path and withouth ".html" suffix). + * @param {String} type - metadata to check: {"META_PROFILE", "META_DELIVERERS"}. + * @param {Object} expectedValue - value that is expected to be found. */ -const compareMetadata = function(url, type, expectedValue) { // profile, deliverers) { +const compareMetadata = function(url, file, type, expectedValue) { - const specberus = new validator.Specberus - , handler = new sink.Sink(console.log) // , console.log) + const specberus = new validator.Specberus() + , handler = new sink.Sink(function(data) { throw new Error(data); }) + , thisFile = file ? 'test/docs/metadata/' + file + '.html' : null ; - // handler.on('exception', function () {}); - // handler.on('done', function () {}); - const opts = {events: handler, url: url}; + const opts = {events: handler, url: url, file: thisFile}; - if (METADATA_PROFILE === type) { - it('Should detect a ' + expectedValue, function () { + if (META_PROFILE === type) { + it('Should detect a ' + expectedValue, function (done) { handler.on('end-all', function () { - // console.dir(specberus.metadata); - // expect(specberus.metadata).to.not.be(undefined); - // expect(specberus.metadata.detectedProfile).to.not.be(undefined); - expect(specberus.metadata.detectedProfile).to.equal(expectedValue); + chai(specberus).to.have.property('meta').to.have.property('detectedProfile').equal(expectedValue); + done(); }); specberus.extractMetadata(opts); }); } - else if (METADATA_DELIVERERS === type) { - it('Should find deliverers of sample spec', function () { + else if (META_DELIVERERS === type) { + it('Should find deliverers of ' + (url ? url : file), function (done) { handler.on('end-all', function () { - // console.dir(specberus.metadata); - // expect(specberus.metadata).to.not.be(undefined); - // expect(specberus.metadata.detectedDeliverers).to.not.be(undefined); - // expect(specberus.metadata.detectedDeliverers).to.be.an('array'); - expect(specberus.metadata.detectedDeliverers.length).to.equal(expectedValue.length); - // for(var i = 0; i < specberus.metadata.detectedDeliverers.length; i ++) { - // @TODO: compare all deliverers, one by one. - // } - // done(); + chai(specberus).to.have.property('meta').to.have.property('detectedDeliverers'); + chai(specberus.meta.detectedDeliverers).to.satisfy(function(found) { + return equivalentDeliverers(found, expectedValue); + }); + done(); }); specberus.extractMetadata(opts); }); - - } + }; describe('Basics', function() { @@ -76,29 +100,38 @@ describe('Basics', function() { describe('Method "extractMetadata"', function() { - // it('Should exist and be a function'), function() { - // expect(specberus.extractMetadata).to.be.a('function'); - // }; + it('Should exist and be a function', function(done) { + chai(specberus).to.have.property('extractMetadata').that.is.a('function'); + done(); + }); - if (!process || !process.env || !process.env.SKIP_NETWORK) { + if (!process || !process.env || (process.env.TRAVIS !== 'true' && !process.env.SKIP_NETWORK)) { for(var i in samples) { - compareMetadata(samples[i].url, METADATA_PROFILE, samples[i].profile); + compareMetadata(samples[i].url, null, META_PROFILE, samples[i].profile); + } + for(var i in samples) { + compareMetadata(samples[i].url, null, META_DELIVERERS, samples[i].deliverers); } } - - if (!process || !process.env || !process.env.SKIP_NETWORK) { + else { for(var i in samples) { - compareMetadata(samples[i].url, METADATA_DELIVERERS, samples[i].deliverers); + compareMetadata(null, samples[i].file, META_PROFILE, samples[i].profile); + } + for(var i in samples) { + compareMetadata(null, samples[i].file, META_DELIVERERS, samples[i].deliverers); } } }); - // describe('Method "validate"', function() { - // it('Should exist and be a function'), function() { - // expect(specberus.validate).to.be.a('function'); - // }; - // }); + describe('Method "validate"', function() { + + it('Should exist and be a function', function(done) { + chai(specberus).to.have.property('validate').that.is.a('function'); + done(); + }); + + }); }); From a3df241ea80628a77ea1bbc992690e0217629012 Mon Sep 17 00:00:00 2001 From: tripu Date: Sun, 20 Mar 2016 13:29:35 +0900 Subject: [PATCH 13/23] Remove API stuff --- app.js | 2 -- lib/api.js | 99 ----------------------------------------------------- test/api.js | 65 ----------------------------------- 3 files changed, 166 deletions(-) delete mode 100644 lib/api.js delete mode 100644 test/api.js diff --git a/app.js b/app.js index 2dd2a9f76..3844ce60c 100644 --- a/app.js +++ b/app.js @@ -19,7 +19,6 @@ const bodyParser = require('body-parser') // Internal packages: const package = require('./package.json') -, api = require('./lib/api') , l10n = require('./lib/l10n') , sink = require('./lib/sink') , validator = require('./lib/validator') @@ -47,7 +46,6 @@ app.use(morgan('combined')); app.use(compression()); app.use(bodyParser.json()); app.use(express.static("public")); -api.setUp(app); // listen up server.listen(process.argv[2] || process.env.PORT || DEFAULT_PORT); diff --git a/lib/api.js b/lib/api.js deleted file mode 100644 index 51bac2c92..000000000 --- a/lib/api.js +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Specberus REST API. - */ - -// Internal packages: -const package = require('../package.json') -, sink = require('./sink') -, validator = require('./validator') -; - -const Sink = sink.Sink -, version = package.version -; - -/** - * Build an "options" object based on an HTTP query string. - * - * @param {Object} query - an HTTP request query. - * @returns {Object} an "options" object that can be used by Specberus. - */ - -const parseSource = function(query) { - var result; - if (query.url) result = {url: query.url}; - else if (query.source) result = {source: query.source}; - else if (query.file) result = {file: query.file}; - else if (query.document) result = {document: query.document}; - return result; -}; - -/** - * Handle an API request: parse method and parameters; handle common errors. - * - * @param {Object} req - HTTP request. - * @param {Object} res - HTTP result. - */ - -const parseRequest = function(req, res) { - - var options - , v - , handler - ; - - if ('/api/version' === req.path) { - res.status(200).send(version); - } - - else if (!req.query) { - res.status(400).send('Missing parameters.'); - } - - else if ('/api/metadata' === req.path) { - options = parseSource(req.query); - if (0 === Object.keys(options).length) { - res.status(400).send('At least one of "url", "source", "file" or "document" must be specified.'); - } - else { - v = new validator.Specberus - handler = new Sink(function(data) { - res.status(500).send(data); - }, function(data) { - res.status(200).send(v.meta); - }) - ; - options.events = handler; - v.extractMetadata(options); - } - } - - else if ('/api/validate' === req.path) { - options = parseSource(req.query); - if (0 === Object.keys(options).length) { - res.status(400).send('At least one of "url", "source", "file" or "document" must be specified.'); - } - else { - v = new validator.Specberus - handler = new Sink(function(data) { - res.status(500).send(data); - }, function(data) { - res.status(200).end(); - }) - ; - options.events = handler; - v.validate(options); - } - } - - else { - res.status(404).send('Wrong API method.'); - } - -}; - -const setUp = function(app) { - app.post('/api/*', parseRequest); -}; - -exports.setUp = setUp; diff --git a/test/api.js b/test/api.js deleted file mode 100644 index 5e92b594c..000000000 --- a/test/api.js +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Test the REST API. - */ - -// External packages: -const expect = require('expect.js') -, superagent = require('superagent') -; - -// Internal packages: -const samples = require('./samples') -, package = require('../package') -; - -/** - * Assert that the profile detected in a spec is equal to the known profile. - * - * @param {String} url - public URL of a spec. - * @param {String} profile - profile that should be detected. - */ - -const detect = function(url, profile) { - it('Should detect a ' + profile, function () { - // @TODO; submit URL to endpoint and check profiles. - }); -}; - -if (!process || !process.env || !process.env.SKIP_NETWORK) { - - // @TODO: launch Specberus locally as a server, listening to HTTP requests. - - describe('API', function() { - -/* it('The endpoint should exist', function() { - // @TODO - }); - - describe('Method "version"', function() { - it('Should exist'), function() { - // @TODO - }; - it('Should return the right version string'), function() { - // @TODO; query method and compare with "package.version". - }; - }); - - describe('Method "metadata"', function() { - it('Should exist'), function(done) { - // @TODO - }; - for(var i in samples) { - detect(samples[i].url, samples[i].profile); - } - }); - - describe('Method "validate"', function() { - it('Should exist'), function(done) { - // @TODO - }; - // @TODO; submit a few sample specs for validation; check results. - }); */ - - }); - -} From 578db18c20fe6db9007fc1b29998e8b20ce9b28f Mon Sep 17 00:00:00 2001 From: tripu Date: Sun, 20 Mar 2016 13:44:01 +0900 Subject: [PATCH 14/23] This sample isn't necessary (there's another WD) --- test/docs/metadata/appmanifest.html | 3475 --------------------------- test/samples.json | 13 +- 2 files changed, 1 insertion(+), 3487 deletions(-) delete mode 100644 test/docs/metadata/appmanifest.html diff --git a/test/docs/metadata/appmanifest.html b/test/docs/metadata/appmanifest.html deleted file mode 100644 index cbce2389c..000000000 --- a/test/docs/metadata/appmanifest.html +++ /dev/null @@ -1,3475 +0,0 @@ - - - - - - Web App Manifest - - - - - -

Abstract

-

- This specification defines a JSON-based manifest file that provides - developers with a centralized place to put metadata associated with a - web application. This metadata includes, but is not limited to, the web - application's name, links to icons, as well as the preferred URL to - open when a user launches the web application. The manifest also allows - developers to declare a default orientation for their web application, - as well as providing the ability to set the display mode for the - application (e.g., in fullscreen). Additionally, the manifest allows a - developer to "scope" a web application to a URL. This restricts the - URLs to which the manifest is applied and provides a means to "deep - link" into a web application from other applications. -

-

- Using this metadata, user agents can provide developers with means to - create user experiences that are more comparable to that of a native - application. -

-

- To associate documents of a web application with a manifest, this - specification defines the manifest link type as a - declarative means for a document to be associated with a manifest. -

-

Status of This Document

-

- This section describes the status of this document at the time of its publication. Other documents may supersede this document. A list of current W3C publications and the latest revision of this technical report can be found in the W3C technical reports index at http://www.w3.org/TR/. -

- -
Warning
-

- Implementors need to be aware that this specification is not stable. - However, aspects of this specification are shipping in at least one - browser (see links to implementation status at the top of this - document). Implementors who are not taking part in the - discussions will find the specification changing out from under them - in incompatible ways. Vendors interested in implementing - this specification before it eventually reaches the Candidate - Recommendation phase should subscribe to the repository - on GitHub and take part in the discussions. -

-
- -

- This document was published by the Web Platform Working Group as a Working Draft. - This document is intended to become a W3C Recommendation. - If you wish to make comments regarding this document, please send them to - public-webapps@w3.org - (subscribe, - archives). - - All comments are welcome. -

-

- Publication as a Working Draft does not imply endorsement by the W3C - Membership. This is a draft document and may be updated, replaced or obsoleted by other - documents at any time. It is inappropriate to cite this document as other than work in - progress. -

-

- This document was produced by - a group - operating under the - 5 February 2004 W3C Patent - Policy. - W3C maintains a public list of any patent - disclosures - made in connection with the deliverables of - the group; that page also includes - instructions for disclosing a patent. An individual who has actual knowledge of a patent - which the individual believes contains - Essential - Claim(s) must disclose the information in accordance with - section - 6 of the W3C Patent Policy. -

-

This document is governed by the 1 September 2015 W3C Process Document. -

- -
- -
-

1. - Usage Examples -

This section is non-normative.

-

- This section shows how developers can make use of the various features - of this specification. -

-
-

1.1 - Example manifest -

This section is non-normative.

-

- The following shows a typical manifest. -

-
Example 1: typical manifest
{
-  "lang": "en",
-  "name": "Super Racer 2000",
-  "short_name": "Racer2K",
-  "icons": [{
-        "src": "icon/lowres",
-        "sizes": "64x64",
-        "type": "image/webp"
-      }, {
-        "src": "icon/hd_small",
-        "sizes": "64x64"
-      }, {
-        "src": "icon/hd_hi",
-        "sizes": "128x128",
-        "density": 2
-      }],
-  "splash_screens": [{
-        "src": "splash/lowres",
-        "sizes": "320x240"
-      }, {
-        "src": "splash/hd_small",
-        "sizes": "1334x750"
-      }, {
-        "src": "splash/hd_hi",
-        "sizes": "1920x1080",
-        "density": 3
-      }],
-  "scope": "/racer/",
-  "start_url": "/racer/start.html",
-  "display": "fullscreen",
-  "orientation": "landscape",
-  "theme_color": "aliceblue",
-  "background_color": "red"
-}
-
- -
-
-

2. - Installable web applications -

-

- This document attempts to address the Use Cases and - Requirements for Installable Web Apps . -

-

- A web application is installed if the - user agent has provided the end-user with a means of instantiating a - new top-level browsing context that has the manifest's members - applied to it. That is, the manifest's members, or their defaults, are - in effect on the top-level browsing context (see application - context). -

-

- An example of installing would be a user agent that allows the - end-user to add a shortcut to a web application on their device's - homescreen (using the name and one of the icons found in the manifest). - Then, when the end-user launches a web application through this - explicit means, the manifest is applied to the browsing context prior - to the start URL being loaded. This gives the user agent time to - apply the relevant values of the manifest, possibly changing the - display mode and screen orientation of the web application. -

-

- Alternatively, an application context can be launched through a deep - link (a URL that is within scope of the installed web application); - in which case, the manifest is applied and the deep link is loaded - within the context of web application. -

-
-

2.1 - Authority of the manifest's metadata -

-

- When a manifest is linked from a Document, it - indicates to the user agent that the metadata is - authoritative: that is, the user agent SHOULD use the - metadata of the manifest instead of the one in the - Document. However, in cases where metadata is missing, - or in error, a user agent MAY fallback to the Document - to find suitable replacements for missing manifest members (e.g., - using application-name in place of - short_name). -

-
-
-

2.2 - Installability signals -

This section is non-normative.

-

- By design, this specification does not provide developers with an - explicit API to "install" a web application. Instead, a - manifest can serve as an installability signal to a user agent - that a web application can be installed. -

-

- Examples of installability signals for a web application: -

-
    -
  • is associated with a manifest with at least a - name member and a suitable icon. -
  • -
  • is served over a secure network connection. -
  • -
  • has a sensible content security policy. -
  • -
  • is able to responsibly adapt to display on a variety of screen - sizes, catering for both mobile and desktop. -
  • -
  • is able to function without a network connection. -
  • -
  • is repeatedly used by the end-user over some extended period of - time. -
  • -
  • has been explicitly marked by the user as one that they value and - trust (e.g., by bookmarking or "starring" it). -
  • -
-

- This list is not exhaustive and some installability signals - might not apply to all user agents. How a user agent makes use of - these installability signals to determine if a web application - can be installed is left to implementers. -

-
-
- -
-

4. - Display modes -

-

- A display mode represents how the web application is being - presented within the context of an OS (e.g., in fullscreen, etc.). - Display modes correspond to user interface (UI) metaphors and - functionality in use on a given platform. The UI conventions of the - display modes are purely advisory and implementers are free to - interpret them how they best see fit. -

-

- Once a user agent applies a particular display mode to an - application context, it becomes the default display - mode for the top-level browsing context (i.e., it is used - as the display mode when the window is navigated). The user - agent MAY override the default display mode for security reasons - (e.g., the top-level browsing context is navigated to - another origin) and/or the user agent MAY provide the user with a means - of switching to another display mode. -

-

- When the display member is missing, or if there is no - valid display member, the user agent uses the - browser display mode as the default display - mode. As such, the user agent is REQUIRED to support the - browser display mode. -

-

- Each display mode, except browser, has a - fallback display mode, - which is the display mode that the user agent can try to use if - it doesn't support a particular display mode. If the user agent - does support a fallback display mode, then it checks to see if - it can use that display mode's fallback display mode. - This creates a fallback chain, with the default display mode - (browser) being the last item in the chain. -

-
-

- For example, Awesome Browser only supports the - minimal-ui and browser display modes, but a - developer declares that she wants fullscreen in the - manifest. In this case, the user agent will first check if it - supports fullscreen (it doesn't), so it falls back to - standalone (which it also doesn't support), and - ultimately falls back to minimal-ui. -

-
-

- The display modes values and their corresponding fallback - display modes are as follows: -

-
-
- fullscreen -
-
- Opens the web application without any user agent chrome and takes up - the entirety of the available display area. -
-
- The fallback display mode for fullscreen is - standalone. -
-
- standalone -
-
- Opens the web application to look and feel like a standalone native - application. This can include the application having a different - window, its own icon in the application launcher, etc. In this mode, - the user agent will exclude UI elements for controlling navigation, - but can include other UI elements such as a status bar. -
-
- The fallback display mode for standalone is - minimal-ui. -
-
- minimal-ui -
-
- This mode is similar to fullscreen, but provides the end-user - with some means to access a minimal set of UI elements for - controlling navigation (i.e., back, forward, reload, and perhaps some - way of viewing the document's address). A user agent can include - other platform specific UI elements, such as "share" and "print" - buttons or whatever is customary on the platform and user agent. -
-
- The fallback display mode for minimal-ui is - browser. -
-
- browser -
-
- Opens the web application using the platform-specific convention for - opening hyperlinks in the user agent (e.g., in a browser tab or a new - window). -
-
- The browser display mode doesn't have a - fallback display mode (conforming user agents are required to - support the browser display mode). -
-
-
Note

- The fullscreen display mode is orthogonal to, and works - independently of, the [WHATWG-FULLSCREEN] API. The - fullscreen display mode affects the - fullscreen state of the browser window, while the [WHATWG-FULLSCREEN] - API operates on an element contained within the viewport. As such, a - web application can have its display mode set to - fullscreen, while - document.fullScreenElement returns null, and - fullscreenEnabled returns false. -

-
-

4.1 - The 'display-mode' media feature -

-

- The display-mode media feature represents, - via a CSS media query [MEDIAQ], the display mode of the web - application. This media feature applies to the top-level browsing - context and any child browsing contexts. Child browsing contexts - reflect the display mode of the top-level browsing - context. -

-

- A user agent MUST expose the 'display-mode' media - feature irrespective of whether a manifest is being applied to a - browsing context. For example, if the end-user puts the whole user - agent into fullscreen, then the user agent would reflect this change - to CSS and scripts via the 'display-mode' media feature. -

-
Note
-

- Please note that the fullscreen display mode is - not directly related to the CSS :fullscreen - pseudo-class specified in the [WHATWG-FULLSCREEN] API. The - :fullscreen pseudo-class matches exclusively when a - [HTML] element is put into the fullscreen element stack. - However, a side effect of calling the - requestFullscreen() method on an element using the - [WHATWG-FULLSCREEN] API is that the browser window can enter a - fullscreen mode at the OS-level. In such a case, both - :fullscreen and (display-mode: - fullscreen) will match. -

-

- On some platforms, it is possible for a user to put a browser - window into fullscreen without the aid of the [WHATWG-FULLSCREEN] - API. When this happens, the :fullscreen pseudo class - will not match, but (display-mode: fullscreen) will - match. This is exemplified in CSS code below. -

-
Example 3
/* applies when the window is fullscreen */
-@media all and (display-mode: fullscreen) {
-    ...
-}
-
-/* applies when an element goes fullscreen */
-#game:fullsceen{
-    ...
-}
-
-
-
- Value: -
-
- fullscreen | standalone | minimal-ui | - browser -
-
- Applies to: -
-
- visual media types -
-
- Accepts min/max prefixes: -
-
- No -
-
-

- A user agent MUST reflect the applied display mode of the web - application via a CSS media query [MEDIAQ]. -

-
-

4.1.1 - Examples -

-

- An example in CSS: -

-
Example 4
@media all and (display-mode: minimal-ui) {
-  /* ... */
-}
-@media all and (display-mode: standalone) {
-  /* ... */
-}
-

- Accessing the display-mode media feature in ECMAScript through - matchMedia() of [cssom-view]: -

-
Example 5
const standalone = matchMedia( '(display-mode: standalone)' );
-
-standalone.onchange = (e) => {
-  /* handle changes to display mode */
-}
-
-if (standalone.matches) {
-  /* do standalone things */
-}
-
-
-

4.1.2 - Security and privacy considerations -

-

- The 'display-mode' media feature allows an origin - access to aspects of a user’s local computing environment and, - together with the display member, allows an origin - some measure of control over a user agent’s native UI: Through a - CSS media query, a script can know the display mode of a web - application. An attacker could, in such a case, exploit the fact - that an application is being displayed in fullscreen to mimic the - user interface of another application. -

-

- Furthermore, by neglecting to define a scope member in the - manifest, it's possible to put a web application into a display - mode that persists cross-origin (for legacy reasons, this is - the default behavior). In case where the navigation scope is - unbounded, it is left to the user agent to either stop applying the - manifest when a cross-origin navigation occurs or to show some sort - of security warning to the user. -

-
-
-
-
-

5. - Associating a resource with a manifest -

-

- A resource is said to be associated with a manifest if the - resource representation, an HTML document, has a manifest link relationship. -

-
-

5.1 - Linking to a manifest -

-

- The manifest keyword can be used with a [HTML] - link element. This keyword creates an external - resource link. -

- - - - - - - - - - - - - - - - -
- Link type - - Effect on... - - Brief description -
- link - - a and area -
- manifest - - External Resource - - not allowed - - Imports or links to a manifest. -
-

- The media type for a manifest serves as the default media type - for resources associated with the manifest link type. -

-
Note

- In cases where more than one link element with a - manifest link type appears in a Document, - the user agent uses the first link element in - tree order and ignores all subsequent link - element with a manifest link type (even if the first - element was erroneous). See the steps for obtaining a - manifest. -

-

- To obtain a manifest, the user agent MUST run the steps for - obtaining a manifest. The appropriate time to obtain the manifest - is left up to implementations. A user agent MAY opt to delay fetching - a manifest until after the document and its other resources have been - fully loaded (i.e., to not delay the availability of content and - scripts required by the document). -

-

- A manifest is obtained and applied regardless of the - media attribute of the link - element matches the environment or not. -

-
-
-
-

6. - Manifest life-cycle -

-

- This section defines algorithms for obtaining, - processing, and applying a - manifest, and gives recommendations to implementers on how to - react when the manifest is updated. -

-
-

6.1 - Obtaining a manifest -

-

- The steps for obtaining a manifest are given by the - following algorithm. The algorithm, if successful, returns a - processed manifest and the manifest URL; otherwise, - it terminates prematurely and returns nothing. In the case of nothing - being returned, the user agent MUST ignore the manifest declaration. In - running these steps, a user agent MUST NOT delay the load - event. -

-
    -
  1. From the Document of the top-level browsing - context, let manifest link be the first - link element in tree order whose - rel attribute contains the token manifest. -
  2. -
  3. If manifest link is null, terminate this - algorithm. -
  4. -
  5. If manifest link's href attribute's value - is the empty string, then abort these steps. -
  6. -
  7. Let manifest URL be the result of parsing the - value of the href attribute, relative to the - element's base URL. If parsing fails, then abort these steps. -
  8. -
  9. Let request be a new [FETCH] request, whose - URL is manifest URL, and whose context is - "manifest". -
  10. -
  11. If the manifest link's crossOrigin - attribute's value is 'use-credentials', then set - request's credentials to 'include'. -
  12. -
  13. Await the result of performing a fetch with - request, letting response be the result. -
  14. -
  15. If response is a network error, terminate this - algorithm. -
  16. -
  17. Let manifest be the result of running the steps for - processing a manifest with response's body as the - text, manifest URL, and the URL that represents - the address of the top-level browsing context. -
  18. -
  19. Return manifest and manifest URL. -
  20. -
-
Note
-

- Authors are encouraged to use the HTTP cache directives to - explicitly cache the manifest. For example, the following response - would cause a cached manifest to be used 30 days from the time the - response is sent: -

-
HTTP/1.1 200 OK
-Cache-Control: max-age=2592000
-Content-Type: application/manifest+json
-
-{
-  "lang": "en",
-  "name": "Super Racer 2000",
-  "start_url": "/start.html",
-  "display": "fullscreen",
-  "orientation": "landscape"
-}
-
-
-

6.1.1 - Content security policy -

-

- A user agent MUST support [CSP3]. -

-
-

- The manifest-src and - default-src directives govern the origins - from which a user agent can fetch a manifest. As with - other directives, by default the manifest-src - directive is *, meaning that a user agent can, - [CORS] permitting, fetch the manifest cross-domain. Remote - origins (e.g., a CDN) wanting to host manifests - for various web applications will need to include the appropriate - [CORS] response header in their HTTP response (e.g., - Access-Control-Allow-Origin: https://example.com). -

-
- manifest-src directive example illustrated -
Fig. 1 - For a [HTML] document, [CSP3]'s - manifest-src directive controls the sources - from which a [HTML] document can load a manifest from. The - same CSP policy's img-src directive controls where - the icon's images can be fetched from. -
-
-
-
-
-
-

6.2 - Processing the manifest -

-

- When instructed to issue a developer warning, the user - agent MAY report the conformance violation to the developer in a - user-agent-specific manner (e.g., report the problem in an error - console), or MAY ignore the error and do nothing. -

-

- When instructed to ignore, the user agent MUST act as if - whatever manifest, member, or value caused the condition is absent. -

-

- The following algorithm provides an extension point: other - specifications that add new members to the manifest are encouraged to - hook themselves into this specification at this point in the - algorithm. -

-
Note
-

- The extension point is meant to help avoid issues related to - monkey patching. -

-
-

- The steps for processing a manifest are given by the - following algorithm. The algorithm takes a text string as - an argument, which represents a manifest, and a URL - manifest URL, which represents the location of the - manifest, and a URL document URL. The output from - inputting an JSON document into this algorithm is a processed - manifest . -

-
    -
  1. Let parsed manifest be an empty object. -
  2. -
  3. Let manifest be the result of - parsing text. If - parsing throws an error: -
      -
    1. - Issue a developer warning with any details pertaining to - the JSON parsing error. -
    2. -
    3. Set manifest to be the result of - parsing the string "{}". -
    4. -
    -
  4. -
  5. If Type(manifest) is not "object": -
      -
    1. - Issue a developer warning that the manifest needs to be - an object. -
    2. -
    3. Set manifest to be the result of - parsing the string "{}". -
    4. -
    -
  6. -
  7. - Extension point: process any proprietary and/or other - supported members at this point in the algorithm. -
  8. -
  9. Let start URL of parsed manifest be the - result of running the steps for processing the - start_url member with manifest, - manifest URL, and document URL as arguments. -
  10. -
  11. Let display mode of parsed manifest be the - result of running the steps for processing the - display member with manifest as the - argument. -
  12. -
  13. Let orientation of parsed manifest be the - result of running the steps for processing the - orientation member with manifest and - display mode as arguments. -
  14. -
  15. Let name of parsed manifest be the result - of running the steps for processing the name - member with manifest as the argument. -
  16. -
  17. Let language of parsed manifest be the - result of running the steps for processing the lang - member with manifest as the argument. -
  18. -
  19. Let short name of parsed manifest be the - result of running the steps for processing the - short_name member with manifest as the - argument. -
  20. -
  21. Let icons of parsed manifest be the result - of running the steps for processing an array of images with - manifest, manifest URL, and "icons" as - arguments. -
  22. -
  23. Let splash screens of parsed manifest be - the result of running the steps for processing an array of - images with manifest, manifest URL, and - "splash_screen" as arguments. -
  24. -
  25. Let scope of parsed manifest be the result - of running the steps for processing the scope - member with manifest, manifest URL, - document URL, start URL as arguments. -
  26. -
  27. Let related applications of parsed manifest - be the result of running the steps for processing the - related_applications member with manifest - as argument. -
  28. -
  29. Let prefer related applications of parsed - manifest be the result of running the steps for processing - the prefer_related_applications member with - manifest as argument. -
  30. -
  31. Let theme color of parsed manifest be the - result of running the steps for processing the - theme_color member with manifest as - argument. -
  32. -
  33. Let background_color of parsed manifest be - the result of running the steps for processing the - background_color member with manifest as - argument. -
  34. -
  35. Return parsed manifest. -
  36. -
-
-
-

6.3 - Applying the manifest -

-

- A manifest is applied to a - top-level browsing context, meaning that the members of the - manifest are affecting the presentation or behavior of a - browsing context. -

-

- A top-level browsing context that has a manifest applied to it - is referred to as an application context. -

-

- If an application context is created as a result of the user - agent being asked to navigate to a deep link, the user - agent MUST immediately navigate to the deep link with - replacement enabled. Otherwise, when the application - context is created, the user agent MUST immediately - navigate to the start URL with replacement - enabled. -

-
-

- Please note that the start URL is not necessarily the value - of the start_url member: the user or user agent - could have changed it when the application was added to home-screen - or otherwise bookmarked. -

-
-

- The appropriate time to apply a manifest is when the - application context is created and before navigation to - the start URL begins. -

-
-
-

6.4 - Updating the manifest -

-
Issue 384: Updating is under/incorrectly specified

The spec says that the user agent may "periodically check if the contents of a manifest has been modified (e.g., by honoring HTTP cache directives associated with the manifest or by checking for updates after the web application has been launched)."

- -

Firstly, being able to periodically check the contents of the manifest requires that the manifest URL does not change. If the manifest URL changes, there's no way to know that any new manifest corresponds to the same app as the old manifest and the only way for the app's metadata to be updated is for the user to re-install the app.

- -

The removal of the same-origin restriction on manifest URLs was intended mainly to allow CDNs to host the manifest on a separate origin to the app, but the above assumes that CDNs are well behaved in that they never change the URL of the manifest, or as a minimum provide a redirect. @slightlylate assures me that CDNs these days are well behaved and do not change URLs of resources. Does anyone have any additional data on that?

- -

So assuming the manifest URL does not change, or at least provides redirects, the user agent can "periodically check the contents" of the manifest to see whether something has been updated. Then the spec says that "in the event that the members of the manifest have been updated, as determined by running the steps for processing a manifest and seeing if anything has changed, the user agent MAY update the metadata corresponding to the web application ".

- -

Now during the "steps for processing the manifest" the spec says that the start_url must be checked to be same origin with the document URL, that is the document the app was installed from. The same is true for the scope property. If the user agent is just "periodically checking the contents" of the manifest, does it need to also keep a record of the URL of the document the app was installed from, in order to make these checks? Note that this URL may be different for every installation as an app can be installed from any page of the app.

- -

So assuming that CDNs behave well, the manifest URL doesn't change, and we've stored the document URL, we can then periodically check the contents of the manifest. Does this work for authenticated manifest URLs too? How does the user agent get the necessary credentials to fetch the manifest? Is the check only done by the user agent when the user logs in and uses the app?

- -

It sounds like the implementation in Chrome may also be based on some additional assumptions regarding a registered Service Worker scope, is that the case? @mounirlamouri ?

- -

If the assumptions above are in fact assumptions made by the spec, do we need to write something in the spec about the fact that user agents do not expect the manifest URL to change, and that therefore CDNs are expected to keep the same URL for the manifest or provide a redirect?

-

- By using the manifest URL, a user agent MAY periodically check - if the contents of a manifest has been modified (e.g., by honoring - HTTP cache directives associated with the manifest or by checking for - updates after the web application has been launched). In the event - that the members of the manifest have been updated, as determined by - running the steps for processing a manifest and seeing if - anything has changed, the user agent MAY update the metadata - corresponding to the web application (e.g., by replacing the name, - icons, navigation scope, or whatever other data has been - changed). -

-

- In addition, even if the manifest has not been modified, the user - agents MAY periodically check if resources referenced from a manifest - (e.g., the icons) have been modified by honoring HTTP cache - directives. If any resources have been modified, the user agent MAY - replace any stale resources. -

-

- To avoid one application masquerading as another, it is RECOMMENDED - that users be made aware of any such updates using implementation or - platform specific conventions. -

-
-
-
-

7. - Manifest and its members -

-

- A manifest is a JSON document that contains startup - parameters and application defaults for when a web application is - launched. A manifest consists of a top-level object that - contains zero or more members. Each of the members are defined below, - as well as how their values are processed. -

-

- Every manifest has an associated manifest URL, which the - [URL] from which the manifest was fetched. -

-
-

7.1 - lang member -

-

- The lang member is a - language tag (string) that specifies the primary language for - the values of the manifest's name and - short_name members. -

-

- A language tag is a string that matches the production of - a Language-Tag defined in the [BCP47] specifications - (see the IANA - Language Subtag Registry for an authoritative list of possible - values, see also the - Maintenance Agency for ISO 3166 country codes). That is, a - language range is composed of one or more subtags that are - delimited by a U+002D HYPHEN-MINUS ("-"). For example, the - 'en-AU' language range represents English as spoken in - Australia, and 'fr-CA' represents French as spoken in - Canada. Language tags that meet the validity criteria of [RFC5646] - section 2.2.9 that can be verified without reference to the IANA - Language Subtag Registry are considered structurally valid. -

-

- The steps for processing the lang member is - given by the following algorithm. The algorithm takes a - manifest as an argument. This algorithm returns a string - or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of manifest - with argument "lang". -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Otherwise, Trim(value) and let tag - be the result. -
  6. -
  7. If calling IsStructurallyValidLanguageTag with - tag as the argument returns false, then: -
      -
    1. - issue a developer warning that the value is - invalid. -
    2. -
    3. Return undefined. -
    4. -
    -
  8. -
  9. Otherwise, return the result of calling the - CanonicalizeLanguageTag abstract operation, passing - tag as the argument. -
  10. -
-
-
-

7.2 - name member -

-

- The name member is a - string that represents the name of the web application as it - is usually displayed to the user (e.g., amongst a list of other - applications, or as a label for an icon). -

-

- The steps for processing the name member is - given by the following algorithm. The algorithm takes a - manifest as an argument. This algorithm returns a string - or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of manifest - with argument "name". -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Otherwise, Trim(value) and return the result. -
  6. -
-
-
-

7.3 - short_name member -

-

- The short_name member - is a string that represents a short version of the name of the - web application. It is intended to be used where there is - insufficient space to display the full name of the web application. -

-

- The steps for processing the short_name - member is given by the following algorithm. The algorithm takes - a manifest as an argument. This algorithm returns a string - or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of manifest - with argument "short_name". -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Otherwise, Trim(value) and return the result. -
  6. -
-
-
-

7.4 - scope member -

-
Issue 380: Provide better examples of scope

People are confused by the lack of examples relating to scope:
-manifoldjs/ManifoldJS#42 (comment)

-

- The scope member is a - string that represents the navigation scope of this web application's - application context. -

-

- The steps for processing the scope member is - given by the following algorithm. The algorithm takes a - manifest manifest, a URL manifest - URL , a URL document URL, and a URL - start URL . This algorithm returns a URL or - undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of the manifest - with argument "scope". -
  2. -
  3. Let type be Type(value). -
  4. -
  5. If type is not "string" or value is the - empty string, then: -
      -
    1. If type is not "undefined", issue - a developer warning that the type is unsupported. -
    2. -
    3. Return undefined. -
    4. -
    -
  6. -
  7. Let scope URL be a new URL using - value as input and manifest URL as - the base URL. -
  8. -
  9. If scope URL is failure: - -
  10. -
  11. If scope URL is not same - origin as document URL: -
      -
    1. - Issue a developer warning that the scope - needs to be - same-origin as Document of the application - context. -
    2. -
    3. Return undefined. -
    4. -
    -
  12. -
  13. If start URL is not within scope of scope URL: -
      -
    1. - Issue a developer warning that the start URL is not - within scope of the navigation scope. -
    2. -
    3. Return undefined. -
    4. -
    -
  14. -
  15. Otherwise, return scope URL. -
  16. -
-
-
-

7.5 - splash_screens member -

-
Issue 372: A means to control how long a splash screen should remain on screen.

An app might want to show its splash screen for a minimum amount of time.

-

- The splash_screens - member is an array of image objects that can - serve as a loading screen for the web application. A splash screen - indicates to the end user that a loading process is occurring (in - effect, that the web application is being prepared by the user agent - in the background). As the splash_screens member is an - array of image objects, developers can use unique image - objects definitions to target minimum screen resolutions and pixel - densities. -

-

- This feature is primarily intended for user agents that take a - perceptible amount of time to self-initialize (e.g., take longer than - 200ms to be able to render content) - as such, it is OPTIONAL for a - user agent to display a splash screen while the web application is - being loaded. On launching a web application, if displaying a splash - screen, the user agent MUST queue a post-load task on the - Document of the start URL to remove the - splashscreen, or, alternatively can follow platform conventions for - how long a splash screen is displayed (e.g., a minimum of 1 second). -

-
Note

- The splash_screens member is processed using the steps - for processing an array of images. -

-
-
-

7.6 - icons member -

-

- The icons member is an - array of image objects that can serve as iconic - representations of the web application in various contexts. For - example, they can be used to represent the web application amongst a - list of other applications, or to integrate the web application with - an OS's task switcher and/or - system preferences. -

-
Note

- The icons member is processed using the steps for - processing an array of images. -

-

- If there are multiple equally appropriate icons in icons, - a user agent MUST use the last one declared in order at the time that - the user agent collected the list of icons. If the user - agent tries to use an icon but that icon is determined, upon closer - examination, to in fact be inappropriate (e.g. because its content - type is unsupported), then the user agent MUST try the - next-most-appropriate icon as determined by examining the image - object's members. -

-
-

- In the following example, the developer has made the following - choices about the icons associated with the web application: -

-
    -
  • The developer has included two icons at the same size, but in - two different formats. One is explicitly marked as WebP through the - type member. If the user agent doesn't support WebP, - it falls back to the second icon of the same size (and density). - The media type of this icon can then be either determined via a - HTTP header, or can be sniffed by the user agent once the first few - bytes of the icon are received. -
  • -
  • The developer wants to use an SVG icon for devices with at - least 2dppx as the display density and only when the available - dimensions are at least 72px. She has found that the SVG file looks - too blurry at small sizes, even on high-density screens. To deal - with this problem, she's included an SVG icon that is only used - when the dimensions are at least 72px and the pixel density is at - least 2dppx. Otherwise, the user agent uses the ICO file - (hd_hi.ico), which includes a gamut of icons individually tailored - for small display sizes. -
  • -
-
{
-  "icons": [
-      {
-        "src": "icon/lowres.webp",
-        "sizes": "48x48",
-        "type": "image/webp"
-      },{
-        "src": "icon/lowres",
-        "sizes": "48x48"
-      },{
-        "src": "icon/hd_hi.ico",
-        "sizes": "72x72 96x96 128x128 256x256"
-      },{
-        "src": "icon/hd_hi.svg",
-        "sizes": "72x72",
-        "density": 2
-      }]
- }
-
-
-
-
-

7.7 - display member -

-

- The display member is a - string, whose value is one of display modes values. The - item represents the developer's preferred display mode for the - web application. When the member is missing or erroneous, the user - agent MUST use the fallback display mode. -

-

- The steps for processing the display member - are given by the following algorithm. The algorithm takes a manifest - manifest as an argument, and returns a string. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of manifest - passing "display" as the argument. -
  2. -
  3. If Type(value) is not "string" or - Trim(value) is not part of the display modes - values: -
      -
    1. If Type(value) is not "undefined", issue - a developer warning that the type is unsupported. -
    2. -
    3. If value is not part of the display modes - values, issue a developer warning that the value is - unsupported. -
    4. -
    5. Return the fallback display mode's value. -
    6. -
    -
  4. -
  5. Otherwise, Trim(value) and set value - to be the result. -
  6. -
  7. If value is not a display mode that the user - agent supports, set value to value's - fallback display mode and re-run this step. -
  8. -
  9. Return value. -
  10. -
-
-
-

7.8 - orientation member -

-

- The orientation - member is a string that serves as the default - orientation for all top-level browsing contexts of the web - application. The possible values are those of the - OrientationLockType enum defined in - [SCREEN-ORIENTATION]. -

-

- If the user agent honors the value of the orientation - member as the default orientation, then that serves as the - default orientation for the life of the web application - (unless overridden by some other means at runtime). This means that - the user agent MUST return the orientation to the default - orientation any time the orientation is unlocked - [SCREEN-ORIENTATION] or the top-level browsing context is - navigated. -

-

- Although the specification relies on the [SCREEN-ORIENTATION]'s - OrientationLockType, it is OPTIONAL for a user - agent to implement the [SCREEN-ORIENTATION] API. Supporting the - [SCREEN-ORIENTATION] API is, of course, RECOMMENDED. -

-

- Certain UI/UX concerns and/or platform conventions will mean that - some screen orientations and display modes cannot be used - together . Which orientations and display modes cannot be used - together is left to the discretion of implementers. For example, for - some user agents, it might not make sense to change the default - orientation of an application while in browser - display mode. -

-
Note

- Once the web application is running, other means can change the - orientation of a top-level browsing context (such as via - [SCREEN-ORIENTATION] API). -

-

- The steps for processing the orientation - member are given by the following algorithm. The algorithm - takes a manifest manifest and display mode - display mode as an argument, and returns a string. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of manifest - with argument "orientation". -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", issue - a developer warning that the type is not supported. -
    2. -
    3. Return the empty string. -
    4. -
    -
  4. -
  5. Otherwise, Trim(value) and set value - to be the result. -
  6. -
  7. If value is not one of the - OrientationLockType enum values, or - value is unsupported by the user agent, or the - value cannot be used together with display - mode: -
      -
    1. - Issue a developer warning. -
    2. -
    3. Return the empty string. -
    4. -
    -
  8. -
  9. Return value. -
  10. -
-
-
-

7.9 - start_url member -

-

- The start_url member is a string that - represents the start URL , which is URL that the - developer would prefer the user agent load when the user launches the - web application (e.g., when the user clicks on the icon of the web - application from a device's application menu or homescreen). -

-

- The start_url member is purely advisory, and a - user agent MAY ignore it or provide the end-user the choice - not to make use of it. A user agent MAY also allow the end-user to - modify the URL when, for instance, a bookmark for the web application - is being created or any time thereafter. -

-

- The steps for processing the start_url member - are given by the following algorithm. The algorithm takes a - manifest manifest, a URL manifest - URL , and a URL document URL. This algorithm - returns a URL. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of the manifest - with argument "start_url". -
  2. -
  3. Let type be Type(value). -
  4. -
  5. If type is not "string" or value is the - empty string: -
      -
    1. If type is not "undefined", issue - a developer warning that the type is unsupported. -
    2. -
    3. Return a new URL whose input is - document URL. -
    4. -
    -
  6. -
  7. Let url be a new URL using value as - input and manifest URL as the base - URL. -
  8. -
  9. If url is failure: - -
  10. -
  11. If url is not same origin as document URL: -
      -
    1. - Issue a developer warning that the - start_url needs to be same-origin as - Document of the top-level browsing context. -
    2. -
    3. Return a new URL whose input is - document URL. -
    4. -
    -
  12. -
  13. Otherwise, return url. -
  14. -
-
-

- For example, if the value of start_url is - ../start_point.html, and the manifest's URL is - https://example.com/resources/manifest.webmanifest, - then the result of URL parsing would be - https://example.com/start_point.html. -

-
-
-
-

7.10 - theme_color member -

-

- The theme_color - member serves as the default theme color for an - application context. What constitutes a theme color is - defined in [META-THEME-COLOR]. -

-

- If the user agent honors the value of the theme_color - member as the default theme color, then that color serves as - the theme color for all browsing contexts to which the - manifest is applied. However, a document may override the - default theme color through the inclusion of a [HTML] - meta element that conforms to [META-THEME-COLOR]. A - user agent MUST return the theme color to the default theme - color when there are no meta elements that conform - to [META-THEME-COLOR] in a document, or the top-level browsing - context is navigated to a URL that is within scope. -

-

- The steps for processing the theme_color - member are given by the following algorithm. The algorithm - takes a manifest as an argument. This algorithm returns a - string or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of manifest - with argument "theme_color". -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Otherwise, let potential color be the result of - running [CSS-SYNTAX-3]'s parse a component value algorithm - with value as input. If parsing returns a syntax error, - return undefined. -
  6. -
  7. Let color be the result of attempting to parse - potential color as a CSS color, as per [CSS-SYNTAX-3]. - If parsing fails: -
      -
    1. - Issue a developer warning. -
    2. -
    3. Return undefined. -
    4. -
    -
  8. -
  9. Return color. -
  10. -
-
- - -
-

7.13 - background_color member -

-

- The background_color - member describes the expected background color of the web - application. It repeats what is already available in the application - stylesheet but can be used by the user agent to draw the - background color of a web application for which the manifest is known - before the files are actually available, whether they are fetched - from the network or retrieved from disk. -

-

- The background_color member is only meant to improve the - user experience while a web application is loading and MUST NOT be - used by the user agent as the background color when the web - application's stylesheet is available. -

-

- The steps for processing the background_color - member are given by the following algorithm. The algorithm - takes a manifest as an argument. This algorithm returns a - string or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of manifest - with argument "background_color". -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Otherwise, let potential color be the result of - running [CSS-SYNTAX-3]'s parse a component value algorithm - with value as input. If parsing returns a syntax error, - return undefined. -
  6. -
  7. Let color be the result of attempting to parse - potential color as a CSS color, as per [CSS-SYNTAX-3]. - If parsing fails: -
      -
    1. - Issue a developer warning. -
    2. -
    3. Return undefined. -
    4. -
    -
  8. -
  9. Return color. -
  10. -
-
-
-
-

8. - Image object and its members -

-
Issue 361: Ability to define platform-specific icon/splash style

Different platforms have different guidelines regarding their icon style, and an app may need to follow them to look "native".

- -

Wrong style, even if slightly wrong, can create "uncanny valley", e.g. multiple OSes use square icons with rounded corners, but with a different radius. Icon with a wrong corner radius looks like an amateurish knock-off.

- -

The spec currently only matches icons on size and density, but not style/theme, so sites would need to resort to user-agent sniffing if they wanted to serve system-specific icons.

- -

A theoretical solution available in HTML, with help of Moz's nonstandard media queries, is:

- -
<link rel=icon href="generic.png">
-<link rel=icon media="all and -moz-windows-theme:luna-blue" href="xp.png">
-<link rel=icon media="all and -moz-windows-theme:aero" href="vista.png">
-
-

- Each image object - represents an image that is used as part of a web application, suitable - to use in various contexts depending on the semantics of the member - that is using the object (e.g., an icon that is part of an application - menu, a splashscreen, etc.). For an image object, this specification - provides developers with a means of specifying the dimensions, optimal - pixel density, and media type of an image (i.e., a "responsive image" - solution [respimg-usecases]). A user agent can use these values to - select an image that is best suited to display on the end-user's device - or most closely matches the end-user's preferences. -

-
-

8.1 - Content security policy of image objects -

-

- The security policy that governs whether a user agent can - fetch an icon image is governed by the img-src directive - [CSP3] associated with the manifest's owner Document. -

-
-

- For example, given the following img-src directive in - the Content-Security-Policy HTTP header of the - manifest's owner Document: -

-
HTTP/1.1 200 OK
-Content-Type: text/html
-Content-Security-Policy: img-src icons.example.com
-
-<!doctype>
-<html>
-<link rel="manifest" href="manifest.webmanifest">
-
-
-

- And given the following manifest.webmanifest: -

-
{
-    "name": "custom manifest",
-    "start_url": "http://boo",
-    "icons": [{
-        "src": "//icons.example.com/lowres"
-      },
-      {
-        "src": "//other.com/hi-res"
-      }]
-}
-

- The fetching of icon resources from - icons.example.com/lowres would succeed, while fetching - from other.com/hi-res would fail. -

-
-
-
-

8.2 - density member -

-

- The density member of an image object - is the device pixel density for which this image was designed. The - device pixel density is expressed as the number of dots per 'px' unit - (equivalent to a dppx as defined in [css3-values]). The value is a - positive number greater than 0. If the developer omits the value, the - user agent assumes the value 1.0. -

-

- The steps for processing a density member of an - image are given by the following algorithm. The algorithm takes - an image image object as an argument and returns a - positive number. -

-
    -
  1. If [[HasOwnProperty]] internal method of image - passing density as the argument returns - false: -
      -
    1. Return 1.0. -
    2. -
    -
  2. -
  3. Let value be the result of calling the - [[GetOwnProperty]] internal method of image passing " - density" as the argument. -
  4. -
  5. Let result be the result of parseFloat( - value); -
  6. -
  7. If result is NaN, +∞, or less than or - equal to +0, then: -
      -
    1. - Issue a developer warning. -
    2. -
    3. Return 1.0. -
    4. -
    -
  8. -
  9. Return result. -
  10. -
-
-
-

8.3 - sizes member -

-

- The sizes member of an image object is a - string consisting of an unordered set of unique - space-separated tokens which are ASCII case-insensitive - that represents the dimensions of an image. Each keyword is either an - ASCII case-insensitive match for the string "any", or a - value that consists of two valid non-negative integers that do - not have a leading U+0030 DIGIT ZERO (0) character and that are - separated by a single U+0078 LATIN SMALL LETTER X or U+0058 LATIN - CAPITAL LETTER X character. The keywords represent icon sizes in raw - pixels (as opposed to CSS pixels). When multiple image objects - are available, a user agent MAY use the value to decide which icon is - most suitable for a display context (and ignore any that are - inappropriate). -

-

- The steps for processing a sizes member of an - image are given by the following algorithm. The algorithm takes - an image object image. This algorithm will return a - set. -

-
    -
  1. Let sizes be an empty set. -
  2. -
  3. Let value be the result of calling the - [[GetOwnProperty]] internal method of image - passing " sizes" as the argument. -
  4. -
  5. Let type be Type(value). -
  6. -
  7. If type is not "string", then: -
      -
    1. If type is not "undefined", issue - a developer warning that the type is unsupported. -
    2. -
    3. Return undefined. -
    4. -
    -
  8. -
  9. Otherwise, parse value as if it was a [HTML] - sizes attribute and let keywords be the - result. -
  10. -
  11. For each keyword in keywords: -
      -
    1. Convert keyword to ASCII lowercase and add - the resulting string to sizes. -
    2. -
    -
  12. -
  13. Return sizes. -
  14. -
-
-
-

8.4 - src member -

-

- The src member of an - image object is a URL from which a user agent can fetch - the image's data. -

-

- The steps for processing the src member of an - image are given by the following algorithm. The algorithm takes - a image object icon, and a URL manifest - URL , which is the URL from which the - manifest was fetched. This algorithm will return a - URL or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of image - passing " src" as the argument. -
  2. -
  3. Let type be Type(value). -
  4. -
  5. If type is not "string", then: -
      -
    1. If type is not "undefined", issue a - developer warning that the type is unsupported. -
    2. -
    3. Return undefined. -
    4. -
    -
  6. -
  7. If Trim(value) is the empty string, then return - undefined. -
  8. -
  9. Otherwise, parse value using manifest - URL as the base URL and return the result. -
  10. -
-
-
-

8.5 - type member -

-

- The type member of an image object is - a hint as to the media type of the image. The purpose of this member - is to allow a user agent to ignore images of media types it does not - support. -

-

- There is no default MIME type for image objects. However, for the - purposes of determining the type of the resource, user agents - must expect the resource to be an image. -

-

- The steps for processing the type member of an - image are given by the following algorithm. The algorithm takes - an image object as an argument, and returns either a - string or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of potential - image passing "type" as the argument. -
  2. -
  3. Let type be Type(value). -
  4. -
  5. If type is not "string", then: -
      -
    1. If type is not "undefined", issue a - developer warning that the type is unsupported. -
    2. -
    3. Return undefined. -
    4. -
    -
  6. -
  7. Trim(value) and set value to be resulting - string. -
  8. -
  9. If value is not a valid MIME type or the value - of type is not a supported media format, issue a developer - warning and return undefined. -
  10. -
  11. Return value. -
  12. -
-
-
-

8.6 - Processing an array of images -

-

- The steps for processing an array of images are given by - the following algorithm. The algorithm takes a manifest, a URL - manifest URL, which is the URL from which the - manifest was fetched, and a string that represents the - member name of the member which contains the array of - image objects. This algorithm returns a list of image - objects, which can be empty. -

-
    -
  1. Let images be an empty list. -
  2. -
  3. Let unprocessed images be the result of calling the - [[GetOwnProperty]] internal method of manifest - with member name as the argument. -
  4. -
  5. If unprocessed images is an array, then: -
      -
    1. From unprocessed images, filter out any item where - HasOwnProperty(item,"src") returns false. -
    2. -
    3. For each potential image in the array: -
        -
      1. Let src be the result of running the steps - for processing the src member of an image - with potential image and manifest URL. -
      2. -
      3. If src is undefined, move onto - the next item in images (if any are left). -
      4. -
      5. Otherwise, let image be an object with - properties src, type, - sizes, and density. All properties - initially set to undefined. -
      6. -
      7. Set image's src property to be - src. -
      8. -
      9. Let type be the result of running the steps - for processing the type member of an image - passing potential image. -
      10. -
      11. If type is not undefined, set - image's type property to be - type. -
      12. -
      13. Let sizes be the list that result from running - the steps for processing a sizes member of an - image passing potential image. -
      14. -
      15. If sizes is not undefined, set - image's sizes property to be - sizes. -
      16. -
      17. Let density be the result from running the - steps for processing a density member of an - image are given by the passing potential - image . -
      18. -
      19. If density is not undefined, set - image's density property to be - value. -
      20. -
      21. Append image to images. -
      22. -
      -
    4. -
    -
  6. -
  7. Otherwise, if unprocessed images is not - undefined: -
      -
    1. - Issue a developer warning that the type is not - supported. -
    2. -
    -
  8. -
  9. Return images. -
  10. -
-
-
-
-

9. - Application object and its members -

-

- Each application object represents an application related to - the web application. An application object has three properties: a - platform which represents the platform it is associated - to, a url which represents the URL where the application - can be found and an id which can be used as an information - additional to the URL or instead of the URL, depending on the platform. - A valid application object MUST have platform and - either an url or an id (or both). -

-
-

- In the following example, the web application is listing two - different related applications, one on Google Play Store and the - other one on the iTunes Store: -

-
{
-  "related_applications": [
-      {
-        "platform": "play",
-        "url": "https://play.google.com/store/apps/details?id=com.example.app1",
-        "id": "com.example.app1"
-      }, {
-        "platform": "itunes",
-        "url": "https://itunes.apple.com/app/example-app1/id123456789",
-      }]
- }
-
-
-
Issue

- Where should the platform expected value be listed? -

-
-

9.1 - platform member -

-

- The platform member of an - application object represents the platform on which the application - can be found. -

-

- The steps for processing the platform member of an - application are given by the following algorithm. The algorithm - takes an application object application. This - algorithm will return a string or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of application - passing "platform" as the argument. -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Otherwise, Trim(value) and return the result. -
  6. -
-
-
-

9.2 - url member -

-

- The url member of an application - object represents the URL at which the application can be - found. -

-

- The steps for processing the url member of an - application are given by the following algorithm. The algorithm - takes an application object application. This - algorithm will return an URL or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of application - passing "url" as the argument. -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Trim(value) and set value to be resulting - string. -
  6. -
  7. Otherwise, parse value and if the result is not - failure, return the result, otherwise return undefined. -
  8. -
-
-
-

9.3 - id member -

-

- The id member of an application - object represents the id which is used to represent the application - on the platform. -

-

- The steps for processing the id member of an - application are given by the following algorithm. The algorithm - takes an application object application. This - algorithm will return a string or undefined. -

-
    -
  1. Let value be the result of calling the - [[GetOwnProperty]] internal method of application - passing "id" as the argument. -
  2. -
  3. If Type(value) is not "string": -
      -
    1. If Type(value) is not "undefined", - optionally issue a developer warning that the type is not - supported. -
    2. -
    3. Return undefined. -
    4. -
    -
  4. -
  5. Otherwise, Trim(value) and return the result. -
  6. -
-
-
-
-

10. - Common conventions and dependencies -

-

- The - [[GetOwnProperty]] operation and the abstract operation - - hasOwnProperty , - parseFloat(string) function, and the - Type(x) notation are defined in - [ECMASCRIPT]. -

-

- When instructed to Trim(x), a user agent MUST - behave as if [ECMASCRIPT]'s - String.prototype.trim() function had been called on the string - x. -

-

- As the manifest uses the JSON format, this specification relies on the - types defined in [ECMA-404] specification: namely object, - array, number, string, - true, false, and null. Strict - type checking is not enforced by this specification. Instead, each - member's definition specifies the steps required to process a - particular member and what to do when a type does not match what is - expected. -

-

- The URL concept and - URL parser - are defined in [WHATWG-URL]. -

-

- The - default orientation concept and the - OrientationLockType enum, are defined in - [SCREEN-ORIENTATION]. -

-

- The algorithm to parse - a component value is defined in [CSS-SYNTAX-3]. -

-

- The - manifest-src, - img-src, and - default-src directives are defined in - [CSP3]. -

-

- The IsStructurallyValidLanguageTag - and CanonicalizeLanguageTag - abstract operations are defined in [ECMAS-402]. -

-

- The following are defined in [FETCH]: -

- -

- The following are defined in [HTML]: -

- -
-
-

11. - IANA considerations -

-

- The following registrations are for community review and will be - submitted to the IESG for - review, approval, and registration with IANA. -

-
-

11.1 - Media type registration -

-

- This section contains the required text for MIME media type - registration with IANA. -

-

- The media type for a manifest is - application/manifest+json. -

-

- If the protocol over which the manifest is transferred supports the - [MIME-TYPES] specification (e.g. HTTP), it is RECOMMENDED that the - manifest be labeled with the media type for a manifest. -

-
-
- Type name: -
-
- application -
-
- Subtype name: -
-
- manifest+json -
-
- Required parameters: -
-
- N/A -
-
- Optional parameters: -
-
- N/A -
-
- Encoding considerations: -
-
- Same as for application/json -
-
- Security and privacy considerations: -
-
-
Issue 348: Triage privacy issues

Lots of feedback here...
-https://lists.w3.org/Archives/Public/public-privacy/2015JanMar/0118.html

- -

cc @npdoty, who we need to work with to address the above...

-

- This specification does not directly deal with high-value data. - However, installed web applications and their data could - be seen as "high value" (particularly from a privacy - perspective). -

-

- As the manifest format is JSON and will commonly be encoded using - [UNICODE], the security considerations described in - [ECMA-404] and [UNICODE-SECURITY] apply. In addition, - because there is no way to prevent developers from including - custom/unrestrained data in a manifest, implementors need - to impose their own implementation-specific limits on the values - of otherwise unconstrained member types, e.g. to prevent denial - of service attacks, to guard against running out of memory, or to - work around platform-specific limitations. -

-

- Web applications will generally contain ECMAScript, HTML, CSS - files, and other media, which are executed in a sand-boxed - environment. As such, implementors need to be aware of the - security implications for the types they support. Specifically, - implementors need to consider the security implications outlined - in at least the following specifications: [CSS-MIME], - [ECMAScript-MIME], [HTML]. -

-

- As web applications can contain content that is able to - simultaneously interact with the local device and a remote host, - implementors need to consider the privacy implications resulting - from exposing private information to a remote host. Mitigation - and in-depth defensive measures are an implementation - responsibility and not prescribed by this specification. However, - in designing these measures, implementors are advised to enable - user awareness of information sharing, and to provide easy access - to interfaces that enable revocation of permissions. -

-

- As this specification allows for the declaration of URLs within - certain members of a manifest, implementors need to consider the - security considerations discussed in the [WHATWG-URL] - specification. Implementations intending to display IRIs and - IDNA addresses - found in the manifest are strongly encouraged to follow the - security advice given in [UNICODE-SECURITY]. -

-

- Developers need to be aware of the security considerations - discussed throughout the [CSP3] specification, particularly in - relation to making data: a valid source for the - purpose of inlining a manifest. Doing so can enable XSS - attacks by allowing a manifest to be included directly in the - document itself; this is best avoided completely. -

-
-
- Applications that use this media type: -
-
- Web browsers -
-
- Additional information: -
-
-
-
- Magic number(s): -
-
- N/A -
-
- File extension(s): -
-
- .webmanifest -
-
- Macintosh file type code(s): -
-
- TEXT -
-
-
-
- Person & email address to contact for further information: -
-
- The Web - Platform Working Group can be contacted at - public-webapps@w3.org. -
-
- Intended usage: -
-
- COMMON -
-
- Restrictions on usage: -
-
- none -
-
- Author: -
-
- W3C's Web Platform Working Group. -
-
- Change controller: -
-
- W3C. -
-
-
- -
-

12. Conformance

-

- As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, - and notes in this specification are non-normative. Everything else in this specification is - normative. -

-

The key words MAY, MUST, MUST NOT, OPTIONAL, RECOMMENDED, REQUIRED, and SHOULD are - to be interpreted as described in [RFC2119]. -

- -

- There is only one class of product that can claim conformance to this - specification: a user agent. -

-
Note

- Although this specification is primarily targeted at web browsers, it - is feasible that other software could also implement this specification - in a conforming manner. For instance, search engines, or crawlers, - could find and process manifests to build up catalogs of sites that - potentially work as installable web applications. -

-
-

12.1 - Extensibility -

This section is non-normative.

-

- This specification is designed to be extensible. Other specifications - are encouraged to define new members for the manifest. However, in - doing so, please follow the conventions used in this specification. - In particular, use the extension point to hook into the - steps for processing a manifest. Also, be sure to specify the - steps for processing your particular member in the manner set forth - in this specification. This will help keep this part of the platform - consistent. -

-

- When specifying a new member, don't override or monkey patch - anything defined in this specification. Also, don't assume your - member will be processed before or after any other member. Keep your - new member, and its processing, atomic and self contained. Note also - that implementations are free to ignore any member they do not - recognize or support. -

-

- If you are writing a specification and temporarily want to patch this - specification to help implementations along, file a bug so the - community is informed of what you are trying to do. -

-
-

12.1.1 - Proprietary manifest members -

This section is non-normative.

-

- Although proprietary extensions are undesirable, they can't - realistically be avoided. As such, the RECOMMENDED way to add a new - proprietary manifest member as an extension is to use a vendor - prefix. -

-

- The following is an example of two hypothetical vendor extensions. -

-
Example 6: vendor extensions
{
-  ...
-  "webkit_fancy_feature": "some/url/img",
-  "moz_awesome_thing": { ... }
-  ...
-}
-
-
-
- -
-

B. - JSON Schema -

-

- Developers interested in validating manifest documents can find - an unofficial JSON - schema for the manifest format at schemastore.org. It is licensed under - Apache - 2.0. It is kindly maintained by Mads Kristensen. If you find - any issues with the JSON schema, please file a bug at - the SchemaStore - repository on GitHub. -

-
-
-

C. - internationalization -

-
Issue 323: Add note about i18n

The note should include:

- -
    -
  • - The spec should details how we expect developers to localize content. Like by: <link rel=manifest href='manifest?lang=en'> -
  • -
  • - An example showing the use of a language other than English.
  • -
  • - Add discussion of obtaining a correctly localized reference, particularly for use in cases where the source page itself has used language negotiation.
  • -
-
-

D. Issue Summary

  • Issue 363: Deep linking - need proper use cases and requirements analysis
  • Issue 384: Updating is under/incorrectly specified
  • Issue 380: Provide better examples of scope
  • Issue 372: A means to control how long a splash screen should remain on screen.
  • Issue 365: Is prefer_related_applications too simplistic?
  • Issue 361: Ability to define platform-specific icon/splash style
  • Issue 348: Triage privacy issues
  • Issue 323: Add note about i18n
-
-

E. - Acknowledgments -

-

- This document reuses text from the [HTML] specification, edited by - Ian Hickson, as permitted by the license of that specification. -

-

Dave Raggett and Dominique Hazael-Massieux contributed to this specification via the HTML5Apps project.

-
- - -

F. References

F.1 Normative references

[BCP47]
A. Phillips; M. Davis. IETF. Tags for Identifying Languages. September 2009. IETF Best Current Practice. URL: https://tools.ietf.org/html/bcp47 -
[CORS]
Anne van Kesteren. W3C. Cross-Origin Resource Sharing. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/cors/ -
[CSP3]
Mike West. W3C. Content Security Policy Level 3. 26 January 2016. W3C Working Draft. URL: http://www.w3.org/TR/CSP3/ -
[CSS-MIME]
H. Lie; B. Bos; C. Lilley. IETF. The text/css Media Type. March 1998. Informational. URL: https://tools.ietf.org/html/rfc2318 -
[CSS-SYNTAX-3]
Tab Atkins Jr.; Simon Sapin. W3C. CSS Syntax Module Level 3. 20 February 2014. W3C Candidate Recommendation. URL: http://www.w3.org/TR/css-syntax-3/ -
[ECMA-404]
Ecma International. The JSON Data Interchange Format. 1 October 2013. Standard. URL: http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf -
[ECMAS-402]
Ecma International. ECMAScript Internationalization API Specification. URL: https://tc39.github.io/ecma402/ -
[ECMASCRIPT]
Ecma International. ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ -
[ECMAScript-MIME]
B. Hoehrmann. IETF. Scripting Media Types. April 2006. Informational. URL: https://tools.ietf.org/html/rfc4329 -
[FETCH]
Anne van Kesteren. WHATWG. Fetch Standard. Living Standard. URL: https://fetch.spec.whatwg.org/ -
[HTML]
Ian Hickson. WHATWG. HTML Standard. Living Standard. URL: https://html.spec.whatwg.org/multipage/ -
[MEDIAQ]
Florian Rivoal et al. W3C. Media Queries. 19 June 2012. W3C Recommendation. URL: http://www.w3.org/TR/css3-mediaqueries/ -
[META-THEME-COLOR]
WHATWG. The 'theme-color' meta extension. Living Standard. URL: https://github.com/whatwg/meta-theme-color -
[MIME-TYPES]
N. Freed; N. Borenstein. IETF. Multipurpose Internet Mail Extensions (MIME) Part Two: Media Types. November 1996. Draft Standard. URL: https://tools.ietf.org/html/rfc2046 -
[RFC2119]
S. Bradner. IETF. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 -
[RFC5646]
A. Phillips, Ed.; M. Davis, Ed.. IETF. Tags for Identifying Languages. September 2009. Best Current Practice. URL: https://tools.ietf.org/html/rfc5646 -
[SCREEN-ORIENTATION]
Mounir Lamouri; Marcos Caceres. W3C. The Screen Orientation API. 23 December 2015. W3C Working Draft. URL: http://www.w3.org/TR/screen-orientation/ -
[UNICODE]
The Unicode Consortium. The Unicode Standard. URL: http://www.unicode.org/versions/latest/ -
[UNICODE-SECURITY]
Mark Davis; Michel Suignard. Unicode Consortium. Unicode Security Considerations. URL: http://www.unicode.org/reports/tr36/ -
[URL]
Anne van Kesteren; Sam Ruby. W3C. URL. 9 December 2014. W3C Working Draft. URL: http://www.w3.org/TR/url-1/ -
[WHATWG-URL]
Anne van Kesteren; Sam Ruby. WHATWG. URL Standard. Living Standard. URL: https://url.spec.whatwg.org/ -

F.2 Informative references

[WHATWG-FULLSCREEN]
Anne van Kesteren. WHATWG. Fullscreen API Standard. Living Standard. URL: https://fullscreen.spec.whatwg.org/ -
[css3-values]
Tab Atkins Jr.; Elika Etemad. W3C. CSS Values and Units Module Level 3. 11 June 2015. W3C Candidate Recommendation. URL: http://www.w3.org/TR/css-values/ -
[cssom-view]
Simon Pieters; Glenn Adams. W3C. CSSOM View Module. 17 December 2013. W3C Working Draft. URL: http://www.w3.org/TR/cssom-view/ -
[respimg-usecases]
Marcos Caceres; Mathew Marquis; Yoav Weiss; David Newton. W3C. Use Cases and Requirements for Standardizing Responsive Images. 7 November 2013. W3C Note. URL: http://www.w3.org/TR/respimg-usecases/ -
diff --git a/test/samples.json b/test/samples.json index 2e2d27377..2919e1e1f 100644 --- a/test/samples.json +++ b/test/samples.json @@ -1,16 +1,5 @@ [ - { - "url": "https://www.w3.org/TR/2016/WD-appmanifest-20160312/" - , "file": "appmanifest" - , "profile": "WD" - , "deliverers": [ - { - "name": "Web Platform Working Group" - , "homepage": "http://www.w3.org/WebPlatform/WG/" - } - ] - } -, { + { "url": "https://www.w3.org/TR/2016/CR-WebIDL-1-20160308/" , "file": "WebIDL-1" , "profile": "CR" From 654e4102f7e2f71e1808ca793800698180359d59 Mon Sep 17 00:00:00 2001 From: tripu Date: Sun, 20 Mar 2016 13:47:29 +0900 Subject: [PATCH 15/23] Simplify samples for metadata: remove content --- test/docs/metadata/WebIDL-1.html | 13493 +----------------- test/docs/metadata/csvw-ucr.html | 3906 +---- test/docs/metadata/mediacapture-depth.html | 655 +- test/docs/metadata/tabular-data-model.html | 2352 +-- test/docs/metadata/tracking-compliance.html | 698 +- test/docs/metadata/ttml-imsc1.html | 3620 +---- 6 files changed, 19 insertions(+), 24705 deletions(-) diff --git a/test/docs/metadata/WebIDL-1.html b/test/docs/metadata/WebIDL-1.html index 4c4d952ce..b3ff88434 100644 --- a/test/docs/metadata/WebIDL-1.html +++ b/test/docs/metadata/WebIDL-1.html @@ -369,13493 +369,8 @@

Table of Contents

- -
-

1. Introduction

- -

This section is informative.

- -

- Technical reports published by the W3C that include programming - language interfaces have typically been described using the - Object Management Group’s Interface Definition Language (IDL) - [OMGIDL]. The IDL provides a means to - describe these interfaces in a language independent manner. Usually, - additional language binding appendices are included in such - documents which detail how the interfaces described with the IDL - correspond to constructs in the given language. -

-

- However, the bindings in these specifications for the language most - commonly used on the web, ECMAScript, are consistently specified with - low enough precision as to result in interoperability issues. In - addition, each specification must describe the same basic information, - such as DOM interfaces described in IDL corresponding to properties - on the ECMAScript global object, or the unsigned - long IDL type mapping to the Number - type in ECMAScript. -

-

- This specification defines an IDL language similar to OMG IDL - for use by specifications that define interfaces for Web APIs. A number of extensions are - given to the IDL to support common functionality that previously must - have been written in prose. In addition, precise language bindings - for ECMAScript Edition 6 are given. -

- -
-

1.1 Typographic conventions

- -

- The following typographic conventions are used in this document: -

-
    -
  • Defining instances of terms: example term
  • -
  • Links to terms defined in this document: example term
  • -
  • Links to terms defined in other documents: example term
  • -
  • Grammar symbols: ExampleGrammarSymbol
  • -
  • IDL and ECMAScript types: ExampleType
  • -
  • Code snippets: a = b + obj.f()
  • -
  • Unicode characters: U+0030 DIGIT ZERO ("0")
  • -
  • Extended attributes: [ExampleExtendedAttribute]
  • -
  • Variable names in prose and algorithms: exampleVariableName.
  • -
  • IDL informal syntax examples: -
    interface identifier {
    -  interface-members…
    -};
    - (Red text is used to highlight specific parts of the syntax discussed in surrounding prose.)
  • -
  • IDL grammar snippets: - - -
    [5]ExampleGrammarSymbolOtherSymbol "sometoken"
     | AnotherSymbol
     | ε  // nothing
    - (Each grammar rule is assigned a number for reference, shown on the left.)
  • -
  • Non-normative notes:
    Note

    This is a note.

  • -
  • Non-normative examples:
    Example

    This is an example.

  • -
  • Normative warnings:
    Warning

    This is a warning.

  • -
  • Code blocks:
    IDL
    // This is an IDL code block.
    -interface Example {
    -  attribute long something;
    -};
    -
    ECMAScript
    // This is an ECMAScript code block.
    -window.onload = function() { window.alert("loaded"); };
  • -
-
-
- -

2. Conformance

-

- As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, - and notes in this specification are non-normative. Everything else in this specification is - normative. -

-

The key words MAY, MUST, MUST NOT, REQUIRED, SHALL, SHOULD, and SHOULD NOT are - to be interpreted as described in [RFC2119]. -

- - - - - -

- The following conformance classes are defined by this specification: -

-
-
conforming set of IDL fragments
-
-

- A set of IDL fragments is considered - to be a conforming - set of IDL fragments if, taken together, they satisfy all of the - MUST-, - REQUIRED- and SHALL-level - criteria in this specification that apply to IDL fragments. -

-
-
conforming implementation
-
-

- A user agent is considered to be a - conforming implementation - relative to a conforming - set of IDL fragments if it satisfies all of the MUST-, - REQUIRED- and SHALL-level - criteria in this specification that apply to implementations for all language - bindings that the user agent supports. -

-
-
conforming ECMAScript implementation
-
-

- A user agent is considered to be a - conforming ECMAScript implementation - relative to a conforming - set of IDL fragments if it satisfies all of the MUST-, - REQUIRED- and SHALL-level - criteria in this specification that apply to implementations for the ECMAScript - language binding. -

-
-
-
- -
-

3. Interface definition language

- -

- This section describes a language, Web IDL, which can be used to define - interfaces for APIs in the Web platform. A specification that defines Web APIs - can include one or more IDL fragments that - describe the interfaces (the state and behavior that objects can exhibit) - for the APIs defined by that specification. - An IDL fragment is - a sequence of definitions that matches the Definitions grammar symbol. - The set of IDL fragments that - an implementation supports is not ordered. - See Appendix A. for the complete grammar and an explanation of the notation used. -

- -

- The different kinds of definitions that can appear in an - IDL fragment are: - interfaces, - partial interface definitions, - dictionaries, - partial dictionary definitions, - typedefs and - implements statements. - These are all defined in the following sections. -

- -

- Each definition - (matching Definition) - can be preceded by a list of extended attributes (matching - ExtendedAttributeList), - which can control how the definition will be handled in language bindings. - The extended attributes defined by this specification that are language binding - agnostic are discussed in section 3.11 , - while those specific to the ECMAScript language binding are discussed - in section 4.3 . -

- -
[extended-attributes]
-interface identifier {
-  interface-members…
-};
- -
[1]DefinitionsExtendedAttributeList Definition Definitions
 | - ε
[2]DefinitionCallbackOrInterface
 | - Partial
 | - Dictionary
 | - Enum
 | - Typedef
 | - ImplementsStatement
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | - Interface
- -
Example
-

- The following is an example of an IDL fragment. -

-
IDL
interface Paint { };
-
-interface SolidColor : Paint {
-  attribute double red;
-  attribute double green;
-  attribute double blue;
-};
-
-interface Pattern : Paint {
-  attribute DOMString imageURL;
-};
-
-[Constructor]
-interface GraphicalWindow {
-  readonly attribute unsigned long width;
-  readonly attribute unsigned long height;
-
-  attribute Paint currentPaint;
-
-  void drawRectangle(double x, double y, double width, double height);
-
-  void drawText(double x, double y, DOMString text);
-};
-

- Here, four interfaces - are being defined. - The GraphicalWindow interface has two - read only attributes, - one writable attribute, and two operations - defined on it. Objects that implement the GraphicalWindow interface - will expose these attributes and operations in a manner appropriate to the - particular language being used. -

-

- In ECMAScript, the attributes on the IDL interfaces will be exposed as accessor - properties and the operations as Function-valued - data properties on a prototype object for all GraphicalWindow - objects; each ECMAScript object that implements GraphicalWindow - will have that prototype object in its prototype chain. -

- -

- The [Constructor] that appears on GraphicalWindow - is an extended attribute. - This extended attribute causes a constructor to exist in ECMAScript implementations, - so that calling new GraphicalWindow() would return a new object - that implemented the interface. -

-
- -
-

3.1 Names

- -

- Every interface, - partial interface definition, - dictionary, - partial dictionary definition, - enumeration, - callback function and - typedef (together called named definitions) - and every constant, - attribute, - and dictionary member has an - identifier, as do some - operations. - The identifier is determined by an - identifier token somewhere - in the declaration: -

-
    -
  • - For named definitions, - the identifier token that appears - directly after the interface, - dictionary, enum - or callback keyword - determines the identifier of that definition. -
    interface interface-identifier { interface-members… };
    -partial interface interface-identifier { interface-members… };
    -dictionary dictionary-identifier { dictionary-members… };
    -partial dictionary dictionary-identifier { dictionary-members… };
    -enum enumeration-identifier { enumeration-values… };
    -callback callback-identifier = callback-signature;
    -
  • -
  • - For attributes, - typedefs - and dictionary members, - the final identifier token before the - semicolon at the end of the declaration determines the identifier. -
    interface identifier {
    -  attribute type attribute-identifier;
    -};
    -
    -typedef type typedef-identifier;
    -
    -dictionary identifier {
    -  type dictionary-member-identifier;
    -};
    -
  • -
  • - For constants, - the identifier token before the - equals sign determines the identifier. -
    const type constant-identifier = value;
    -
  • -
  • - For operations, the - identifier token that appears - after the return type but before the opening parenthesis (that is, - one that is matched as part of the OptionalIdentifier - grammar symbol in an OperationRest) determines the identifier of the operation. If - there is no such identifier token, - then the operation does not have an identifier. -
    return-type operation-identifier(arguments…);
    -
  • -
-
Note
-

- Operations can have no identifier when they are being used to declare a - special kind of operation, such as a getter or setter. -

-
-

- For all of these constructs, the identifier - is the value of the identifier token with any leading - U+005F LOW LINE ("_") character (underscore) removed. -

-
Note
-

- A leading "_" is used to escape an identifier from looking - like a reserved word so that, for example, an interface named “interface” can be - defined. The leading "_" is dropped to unescape the - identifier. -

-
-

- Operation arguments can take a slightly wider set of identifiers. In an operation - declaration, the identifier of an argument is specified immediately after its - type and is given by either an identifier - token or by one of the keywords that match the ArgumentNameKeyword - symbol. If one of these keywords is used, it need not be escaped with a leading - underscore. -

-
return-type operation-identifier(argument-type argument-identifier, …);
-
[71]ArgumentNameKeyword - "attribute"
 | - "callback"
 | - "const"
 | - "deleter"
 | - "dictionary" -
 | - "enum"
 | - "getter"
 | - "implements"
 | - "inherit"
 | - "interface"
 | - "iterable" -
 | - "legacycaller"
 | - "partial"
 | - "required"
 | - "serializer"
 | - "setter"
 | - "static"
 | - "stringifier"
 | - "typedef" -
 | - "unrestricted" -
-

- If an identifier token is used, then the - identifier of the operation argument - is the value of that token with any leading - U+005F LOW LINE ("_") character (underscore) removed. - If instead one of the ArgumentNameKeyword - keyword token is used, then the identifier of the operation argument - is simply that token. -

-

- The identifier of any of the abovementioned - IDL constructs MUST NOT be “constructor”, - “toString”, “toJSON”, - or begin with a U+005F LOW LINE ("_") character. These - are known as reserved identifiers. -

-
Note
-

Further restrictions on identifier names for particular constructs may be made - in later sections.

-
-

- Within the set of IDL fragments - that a given implementation supports, - the identifier of every - interface, - dictionary, - enumeration, - callback function and - typedef - MUST NOT - be the same as the identifier of any other - interface, - dictionary, - enumeration, - callback function or - typedef. -

-

- Within an IDL fragment, a reference - to a definition need not appear after - the declaration of the referenced definition. References can also be made - across IDL fragments. -

-
Example
-

Therefore, the following IDL fragment is valid:

-
IDL
interface B : A {
-  void f(SequenceOfLongs x);
-};
-
-interface A {
-};
-
-typedef sequence<long> SequenceOfLongs;
-
- -
Example
-

- The following IDL fragment - demonstrates how identifiers - are given to definitions and interface members. -

-
IDL
// Typedef identifier: "number"
-typedef double number;
-
-// Interface identifier: "System"
-interface System {
-
-  // Operation identifier:          "createObject"
-  // Operation argument identifier: "interface"
-  object createObject(DOMString _interface);
-
-  // Operation argument identifier: "interface"
-  sequence<object> getObjects(DOMString interface);
-
-  // Operation has no identifier; it declares a getter.
-  getter DOMString (DOMString keyName);
-};
-
-// Interface identifier: "TextField"
-interface TextField {
-
-  // Attribute identifier: "const"
-  attribute boolean _const;
-
-  // Attribute identifier: "value"
-  attribute DOMString? _value;
-};
-

- Note that while the second attribute - on the TextField interface - need not have been escaped with an underscore (because “value” is - not a keyword in the IDL grammar), it is still unescaped - to obtain the attribute’s identifier. -

-
-
- -
-

3.2 Interfaces

- -

- IDL fragments are used to - describe object oriented systems. In such systems, objects are entities - that have identity and which are encapsulations of state and behavior. - An interface is a definition (matching - Interface or - "callback" Interface) that declares some - state and behavior that an object implementing that interface will expose. -

-
interface identifier {
-  interface-members…
-};
-

- An interface is a specification of a set of - interface members - (matching InterfaceMembers), - which are the constants, - attributes, - operations and - other declarations that appear between the braces in the interface declaration. - Attributes describe the state that an object - implementing the interface will expose, and operations describe the - behaviors that can be invoked on the object. Constants declare - named constant values that are exposed as a convenience to users - of objects in the system. -

-

- Interfaces in Web IDL describe how objects that implement the - interface behave. In bindings for object oriented languages, it is - expected that an object that implements a particular IDL interface - provides ways to inspect and modify the object's state and to - invoke the behavior described by the interface. -

- -

- An interface can be defined to inherit from another interface. - If the identifier of the interface is followed by a - U+003A COLON (":") character - and an identifier, - then that identifier identifies the inherited interface. - An object that implements an interface that inherits from another - also implements that inherited interface. The object therefore will also - have members that correspond to the interface members from the inherited interface. -

-
interface identifier : identifier-of-inherited-interface {
-  interface-members…
-};
-

- The order that members appear in has no significance except in the - case of overloading. -

-

- Interfaces may specify an interface member that has the same name as - one from an inherited interface. Objects that implement the derived - interface will expose the member on the derived interface. It is - language binding specific whether the overridden member can be - accessed on the object. -

-
Example
-

- Consider the following two interfaces. -

-
IDL
interface A {
-  void f();
-  void g();
-};
-
-interface B : A {
-  void f();
-  void g(DOMString x);
-};
-

- In the ECMAScript language binding, an instance of B - will have a prototype chain that looks like the following: -

-
  [Object.prototype: the Object prototype object]
-       ↑
-  [A.prototype: interface prototype object for A]
-       ↑
-  [B.prototype: interface prototype object for B]
-       ↑
-  [instanceOfB]
-

- Calling instanceOfB.f() in ECMAScript will invoke the f defined - on B. However, the f from A - can still be invoked on an object that implements B by - calling A.prototype.f.call(instanceOfB). -

- -
-

- The inherited interfaces of - a given interface A is the set of all interfaces that A - inherits from, directly or indirectly. If A does not inherit - from another interface, then the set is empty. Otherwise, the set - includes the interface B that A inherits - from and all of B’s inherited interfaces. -

-

- An interface MUST NOT be declared such that - its inheritance hierarchy has a cycle. That is, an interface - A cannot inherit from itself, nor can it inherit from another - interface B that inherits from A, and so on. -

-

- Note that general multiple inheritance of interfaces is not supported, and - objects also cannot implement arbitrary sets of interfaces. - Objects can be defined to implement a single given interface A, - which means that it also implements all of A’s - inherited interfaces. In addition, - an implements statement can be - used to define that objects implementing an interface will always - also implement another interface. -

-

- Each interface member can be preceded by a list of extended attributes (matching - ExtendedAttributeList), - which can control how the interface member will be handled in language bindings. -

-
interface identifier {
-
-  [extended-attributes]
-  const type identifier = value;
-
-  [extended-attributes]
-  attribute type identifier;
-
-  [extended-attributes]
-  return-type identifier(arguments…);
-};
- -

- A callback interface is - an interface - that uses the callback keyword at the start of - its definition. Callback interfaces are ones that can be - implemented by user objects - and not by platform objects, - as described in section 3.9 - . -

-
callback interface identifier {
-  interface-members…
-};
-
Note
-

See also the similarly named callback function definition.

-
-

- Callback interfaces - MUST NOT inherit - from any non-callback interfaces, and non-callback interfaces MUST NOT - inherit from any callback interfaces. - Callback interfaces MUST NOT have any - consequential interfaces. -

-

- Static attributes and - static operations MUST NOT - be defined on a callback interface. -

-
Warning
-

- Specification authors SHOULD NOT define - callback interfaces - that have only a single operation, - unless required to describe the requirements of existing APIs. - Instead, a callback function SHOULD be used. -

-

- The definition of EventListener as a - callback interface - is an example of an existing API that needs to allow - user objects with a - given property (in this case “handleEvent”) to be considered to implement the interface. - For new APIs, and those for which there are no compatibility concerns, - using a callback function will allow - only a Function object (in the ECMAScript - language binding). -

-
- - - -
Note
-

- Specification authors wanting to define APIs that take ECMAScript objects - as “property bag” like function arguments are suggested to use - dictionary types rather than - callback interfaces. -

-

- For example, instead of this: -

-
IDL
callback interface Options {
-  attribute DOMString? option1;
-  attribute DOMString? option2;
-  attribute long? option3;
-};
-
-interface A {
-  void doTask(DOMString type, Options options);
-};
-

- to be used like this: -

-
ECMAScript
var a = getA();  // Get an instance of A.
-
-a.doTask("something", { option1: "banana", option3: 100 });
-

- instead write the following: -

-
IDL
dictionary Options {
-  DOMString? option1;
-  DOMString? option2;
-  long? option3;
-};
-
-interface A {
-  void doTask(DOMString type, Options options);
-};
-
- -

- The IDL for interfaces can be split into multiple parts by using - partial interface definitions - (matching "partial" PartialInterface). - The identifier of a partial - interface definition MUST be the same - as the identifier of an interface definition. All of - the members that appear on each of the partial interfaces are considered to be - members of the interface itself. -

-
interface SomeInterface {
-  interface-members…
-};
-
-partial interface SomeInterface {
-  interface-members…
-};
-
Note
-

Partial interface definitions are intended for use as a specification - editorial aide, allowing the definition of an interface to be separated - over more than one section of the document, and sometimes multiple documents.

-
-

- The order of appearance of an interface - definition and any of its partial interface - definitions does not matter. -

-
Note
-

A partial interface definition cannot specify that the interface - inherits from another interface. - Inheritance must be specified on the original interface - definition.

-
-

- Extended attributes can be specified on - partial interface definitions, with some - limitations. The following extended attributes MUST NOT - be specified on partial interface definitions: - [Constructor], - - [NamedConstructor], - [NoInterfaceObject]. -

-
Note
-

The above list of extended attributes - is all of those defined in this document that are applicable to - interfaces except for - [Exposed], - [Global], - [OverrideBuiltins], - [PrimaryGlobal] and - [Unforgeable].

-
-

- Any extended attribute specified - on a partial interface definition - is considered to appear on the interface - itself. -

-

- The relevant language binding determines how interfaces correspond to constructs - in the language. -

- - -

- The following extended attributes are applicable to interfaces: - [Constructor], - [Exposed], - [Global], - - [NamedConstructor], - [NoInterfaceObject], - [OverrideBuiltins]. - [PrimaryGlobal], - [Unforgeable]. -

- -
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | - Interface
[4]CallbackRestOrInterfaceCallbackRest
 | - Interface
[5]Interface"interface" identifier Inheritance "{" InterfaceMembers "}" ";"
[6]Partial"partial" PartialDefinition
[7]PartialDefinitionPartialInterface
 | - PartialDictionary
[8]PartialInterface"interface" identifier "{" InterfaceMembers "}" ";"
[9]InterfaceMembersExtendedAttributeList InterfaceMember InterfaceMembers
 | - ε
[10]InterfaceMemberConst
 | - Operation
 | - Serializer
 | - Stringifier
 | - StaticMember
 | - Iterable
 | - ReadOnlyMember
 | - ReadWriteAttribute
[18]Inheritance":" identifier
 | - ε
- -
Example
- -

- The following IDL fragment - demonstrates the definition of two mutually referential interfaces. - Both Human and Dog - inherit from Animal. Objects that implement - either of those two interfaces will thus have a name attribute. -

-
IDL
interface Animal {
-  attribute DOMString name;
-};
-
-interface Human : Animal {
-  attribute Dog? pet;
-};
-
-interface Dog : Animal {
-  attribute Human? owner;
-};
-
- -
Example
-

- The following IDL fragment defines - simplified versions of a few DOM interfaces, one of which - is a callback interface. -

-
IDL
interface Node {
-  readonly attribute DOMString nodeName;
-  readonly attribute Node? parentNode;
-  Node appendChild(Node newChild);
-  void addEventListener(DOMString type, EventListener listener);
-};
-
-callback interface EventListener {
-  void handleEvent(Event event);
-};
-

- Since the EventListener interface is annotated - callback interface, user objects - can implement it: -

-
ECMAScript
var node = getNode();                                // Obtain an instance of Node.
-
-var listener = {
-  handleEvent: function(event) {
-    ...
-  }
-};
-node.addEventListener("click", listener);            // This works.
-
-node.addEventListener("click", function() { ... });  // As does this.
-

- It is not possible for a user object to implement Node, however: -

-
ECMAScript
var node = getNode();  // Obtain an instance of Node.
-
-var newNode = {
-  nodeName: "span",
-  parentNode: null,
-  appendChild: function(newchild) {
-    ...
-  },
-  addEventListener: function(type, listener) {
-    ...
-  }
-};
-node.appendChild(newNode);  // This will throw a TypeError exception.
-
- -
-

3.2.1 Constants

- -

- A constant is a declaration (matching - Const) used to bind a constant value to a name. - Constants can appear on interfaces. -

-
Warning
-

- Constants have in the past primarily been used to define - named integer codes in the style of an enumeration. The Web platform - is moving away from this design pattern in favor of the use of strings. - Specification authors who wish to define constants are strongly advised to discuss - this on the public-script-coord@w3.org - mailing list before proceeding. -

-
-
const type identifier = value;
-

- The identifier of a - constant - MUST NOT be the same as the identifier - of another interface member - defined on the same interface. - The identifier also MUST NOT - be “length”, “name” or “prototype”. -

-
Note
-

- These three names are the names of properties that exist on all - Function objects. -

-
-

- The type of a constant (matching ConstType) - MUST NOT be any type other than - a primitive type - or a nullable primitive type. - If an identifier is used, - it MUST reference a typedef - whose type is a primitive type or a nullable primitive type. -

-

- The ConstValue part of a - constant declaration gives the value of the constant, which can be - one of the two boolean literal tokens (true - and false), - the null token, an - integer token, - a float token, - or one of the three special floating point constant values - (-Infinity, Infinity and NaN). -

-
Note
-

- These values – in addition to strings and the empty sequence – can also be used to specify the - default value - of a dictionary member or of - an optional argument. Note that strings and the - empty sequence [] cannot be used as the value of a - constant. -

-
-

- The value of the boolean literal tokens true and - false are the IDL boolean values - true and false. -

-

- The value of an integer token is an integer - whose value is determined as follows: -

-
    -
  1. Let S be the sequence of characters matched by the integer token.
  2. -
  3. Let sign be −1 if S begins with U+002D HYPHEN-MINUS ("-"), and 1 otherwise.
  4. -
  5. Let base be the base of the number based on the characters that follow the optional leading U+002D HYPHEN-MINUS ("-") character: -
    -
    U+0030 DIGIT ZERO ("0"), U+0058 LATIN CAPITAL LETTER X ("X")
    -
    U+0030 DIGIT ZERO ("0"), U+0078 LATIN SMALL LETTER X ("x")
    -
    The base is 16.
    -
    U+0030 DIGIT ZERO ("0")
    -
    The base is 8.
    -
    Otherwise
    -
    The base is 10.
    -
    -
  6. -
  7. Let number be the result of interpreting all remaining characters following the optional leading U+002D HYPHEN-MINUS ("-") - character and any characters indicating the base as an integer specified in base base.
  8. -
  9. Return sign × number.
  10. -
-

- The type of an integer token is the same - as the type of the constant, dictionary member or optional argument it is being used as the value of. - The value of the integer token MUST NOT - lie outside the valid range of values for its type, as given in - section 3.10 . -

-

- The value of a float token is - either an IEEE 754 single-precision floating point number or an IEEE 754 - double-precision floating point number, depending on the type of the - constant, dictionary member or optional argument it is being used as the value for, determined as follows: -

-
    -
  1. Let S be the sequence of characters matched by the float token.
  2. -
  3. Let value be the Mathematical Value that would be obtained if S were - parsed as an ECMAScript NumericLiteral ( - [ECMA-262] - , section 11.8.3).
  4. -
  5. - If the float token is being - used as the value for a float or - unrestricted float, then - the value of the float token - is the IEEE 754 single-precision floating point number closest to - result. Otherwise, the float token is being - used as the value for a double or - unrestricted double, and - the value of the float token - is the IEEE 754 double-precision floating point number closest to - result. - [IEEE-754] -
  6. -
-

- The value of a constant value specified as - Infinity, -Infinity or NaN is either - an IEEE 754 single-precision floating point number or an IEEE 754 - double-precision floating point number, depending on the type of the - constant, dictionary member or optional argument is is being used as the - value for: -

-
-
Type unrestricted float, constant value Infinity
-
The value is the IEEE 754 single-precision positive infinity value.
-
Type unrestricted double, constant value Infinity
-
The value is the IEEE 754 double-precision positive infinity value.
-
Type unrestricted float, constant value -Infinity
-
The value is the IEEE 754 single-precision negative infinity value.
-
Type unrestricted double, constant value -Infinity
-
The value is the IEEE 754 double-precision negative infinity value.
-
Type unrestricted float, constant value NaN
-
The value is the IEEE 754 single-precision NaN value with the bit pattern 0x7fc00000.
-
Type unrestricted double, constant value NaN
-
The value is the IEEE 754 double-precision NaN value with the bit pattern 0x7ff8000000000000.
-
-

- The type of a float token is the same - as the type of the constant, dictionary member or optional argument it is being used as the value of. The value of the - float token MUST NOT - lie outside the valid range of values for its type, as given in - section 3.10 . - Also, Infinity, -Infinity and NaN MUST NOT - be used as the value of a float - or double. -

-

- The value of the null token is the special - null value that is a member of the - nullable types. The type of - the null token is the same as the - type of the constant, dictionary member or optional argument it is being used as the value of. -

-

- If VT is the type of the value assigned to a constant, and DT - is the type of the constant, dictionary member or optional argument itself, then these types MUST - be compatible, which is the case if DT and VT are identical, - or DT is a nullable type - whose inner type is VT. -

-

- Constants are not associated with - particular instances of the interface - on which they appear. It is language binding specific whether - constants are exposed on instances. -

-
Note
-

- - The ECMAScript language binding does however - allow constants to be accessed - through objects implementing the IDL interfaces - on which the constants are declared. - For example, with the following IDL: -

-
IDL
interface A {
-  const short rambaldi = 47;
-};
-

- the constant value can be accessed in ECMAScript either as - A.rambaldi or instanceOfA.rambaldi. -

-
-

- The following extended attributes are applicable to constants: - [Exposed]. -

- -
[26]Const"const" ConstType identifier "=" ConstValue ";"
[27]ConstValueBooleanLiteral
 | - FloatLiteral
 | - integer
 | - "null"
[28]BooleanLiteral"true"
 | - "false"
[29]FloatLiteralfloat
 | - "-Infinity"
 | - "Infinity"
 | - "NaN"
[80]ConstTypePrimitiveType Null
 | - identifier Null
-
Example
-

- The following IDL fragment - demonstrates how constants - of the above types can be defined. -

-
IDL
interface Util {
-  const boolean DEBUG = false;
-  const octet LF = 10;
-  const unsigned long BIT_MASK = 0x0000fc00;
-  const double AVOGADRO = 6.022e23;
-};
-
-
- -
-

3.2.2 Attributes

- -

- An attribute is an interface member - (matching "static" AttributeRest, - "stringifier" AttributeRest, - or Attribute) - that is used to declare data fields with a given type and - identifier whose value can - be retrieved and (in some cases) changed. There are two kinds of attributes: -

-
    -
  1. regular attributes, which are those - used to declare that objects implementing the interface - will have a data field member with the given identifier -
    attribute type identifier;
  2. -
  3. static attributes, which are used - to declare attributes that are not associated with a particular object implementing the interface -
    static attribute type identifier;
  4. -
-

- If an attribute has no static keyword, then it declares a - regular attribute. Otherwise, - it declares a static attribute. -

-

- The identifier of an - attribute - MUST NOT be the same as the identifier - of another interface member - defined on the same interface. - The identifier of a static attribute MUST NOT - be “prototype”. -

-

- The type of the attribute is given by the type (matching Type) - that appears after the attribute keyword. - If the Type is an - identifier or an identifier followed by ?, - then the identifier MUST - identify an interface, enumeration, - callback function or typedef. -

-

- The type of the attribute, after resolving typedefs, MUST NOT be a - nullable or non-nullable version of any of the following types: -

- -

- The attribute is read only if the - readonly keyword is used before the attribute keyword. - An object that implements the interface on which a read only attribute - is defined will not allow assignment to that attribute. It is language - binding specific whether assignment is simply disallowed by the language, - ignored or an exception is thrown. -

-
readonly attribute type identifier;
-

- A regular attribute - that is not read only - can be declared to inherit its getter - from an ancestor interface. This can be used to make a read only attribute - in an ancestor interface be writable on a derived interface. An attribute - inherits its getter if - its declaration includes inherit in the declaration. - The read only attribute from which the attribute inherits its getter - is the attribute with the same identifier on the closest ancestor interface - of the one on which the inheriting attribute is defined. The attribute - whose getter is being inherited MUST be - of the same type as the inheriting attribute, and inherit - MUST NOT appear on a read only - attribute or a static attribute. -

-
interface Ancestor {
-  readonly attribute TheType theIdentifier;
-};
-
-interface Derived : Ancestor {
-  inherit attribute TheType theIdentifier;
-};
- -

- When the stringifier keyword is used - in a regular attribute - declaration, it indicates that objects implementing the - interface will be stringified to the value of the attribute. See - section 3.2.4.2 - for details. -

-
stringifier attribute DOMString identifier;
-

- If an implementation attempts to get or set the value of an - attribute on a - user object - (for example, when a callback object has been supplied to the implementation), - and that attempt results in an exception being thrown, then, unless otherwise specified, that - exception will be propagated to the user code that caused the - implementation to access the attribute. Similarly, if a value - returned from getting the attribute cannot be converted to - an IDL type, then any exception resulting from this will also - be propagated to the user code that resulted in the implementation - attempting to get the value of the attribute. -

- -

- The following extended attributes - are applicable to regular and static attributes: - [Clamp], - [EnforceRange], - [Exposed], - [SameObject], - [TreatNullAs]. -

- -

- The following extended attributes - are applicable only to regular attributes: - [LenientThis], - [PutForwards], - [Replaceable], - [Unforgeable]. -

- -
[39]ReadOnlyMember"readonly" ReadOnlyMemberRest
[40]ReadOnlyMemberRestAttributeRest
[41]ReadWriteAttribute"inherit" ReadOnly AttributeRest
 | - AttributeRest
[42]AttributeRest"attribute" Type AttributeName ";"
[43]AttributeNameAttributeNameKeyword
 | - identifier
[44]AttributeNameKeyword"required"
[45]Inherit"inherit"
 | - ε
[46]ReadOnly"readonly"
 | - ε
- -
Example
-

- The following IDL fragment - demonstrates how attributes - can be declared on an interface: -

-
IDL
interface Animal {
-
-  // A simple attribute that can be set to any string value.
-  readonly attribute DOMString name;
-
-  // An attribute whose value can be assigned to.
-  attribute unsigned short age;
-};
-
-interface Person : Animal {
-
-  // An attribute whose getter behavior is inherited from Animal, and need not be
-  // specified in the description of Person.
-  inherit attribute DOMString name;
-};
-
-
- -
-

3.2.3 Operations

- -

- An operation is an interface member - (matching "static" OperationRest, - "stringifier" OperationRest, - "serializer" OperationRest, - ReturnType OperationRest or - SpecialOperation) - that defines a behavior that can be invoked on objects implementing the interface. - There are three kinds of operation: -

-
    -
  1. regular operations, which - are those used to declare that objects implementing the - interface will have a method with - the given identifier -
    return-type identifier(arguments…);
  2. -
  3. special operations, - which are used to declare special behavior on objects - implementing the interface, such as object indexing and stringification -
    special-keywords… return-type identifier(arguments…);
    -special-keywords… return-type (arguments…);
  4. -
  5. static operations, - which are used to declare operations that are not associated with - a particular object implementing the interface -
    static return-type identifier(arguments…);
  6. -
-

- If an operation has an identifier but no static - keyword, then it declares a regular operation. - If the operation has one or more - special keywords - used in its declaration (that is, any keyword matching - Special, or - the stringifier keyword), - then it declares a special operation. A single operation can declare - both a regular operation and a special operation; see - section 3.2.4 - for details on special operations. -

-

- If an operation has no identifier, - then it MUST - be declared to be a special operation using one of the - special keywords. -

-

- The identifier of a - regular operation - or static operation - MUST NOT be the same as the identifier - of a constant or - attribute - defined on the same interface. - The identifier of a static operation MUST NOT - be “prototype”. -

-
Note
-

- The identifier can be the same as that of another operation on the - interface, however. This is how operation overloading is specified. -

-
-

- The identifier of a static operation - also MUST NOT be the same as the identifier - of a regular operation - defined on the same interface. -

-

- The return type of the operation is given - by the type (matching ReturnType) - that appears before the operation’s optional identifier. - A return type of void indicates that the operation returns no value. - If the return type is an - identifier followed by ?, - then the identifier MUST - identify an interface, dictionary, enumeration, - callback function or typedef. -

-

- An operation’s arguments (matching ArgumentList) - are given between the parentheses in the declaration. Each individual argument is specified - as a type (matching Type) followed by an identifier - (matching ArgumentName). -

-
Note
-

For expressiveness, the identifier of an operation argument can also be specified - as one of the keywords matching the ArgumentNameKeyword - symbol without needing to escape it.

-
-

- If the Type of an operation argument is an identifier - followed by ?, - then the identifier MUST identify an interface, - enumeration, callback function - or typedef. - If the operation argument type is an identifier - not followed by ?, then the identifier MUST - identify any one of those definitions or a dictionary. -

-
return-type identifier(type identifier, type identifier, …);
-

- The identifier of each argument MUST NOT be the same - as the identifier of another argument in the same operation declaration. -

-

- Each argument can be preceded by a list of - extended attributes (matching - ExtendedAttributeList), - which can control how a value passed as the argument will be handled in - language bindings. -

-
return-type identifier([extended-attributes] type identifier, [extended-attributes] type identifier, …);
- -
Example
-

- The following IDL fragment - demonstrates how regular operations - can be declared on an interface: -

-
IDL
interface Dimensions {
-  attribute unsigned long width;
-  attribute unsigned long height;
-};
-
-interface Button {
-
-  // An operation that takes no arguments and returns a boolean.
-  boolean isMouseOver();
-
-  // Overloaded operations.
-  void setDimensions(Dimensions size);
-  void setDimensions(unsigned long width, unsigned long height);
-};
-
- -

- An operation is considered to be variadic - if the final argument uses the ... token just - after the argument type. Declaring an operation to be variadic indicates that - the operation can be invoked with any number of arguments after that final argument. - Those extra implied formal arguments are of the same type as the final explicit - argument in the operation declaration. The final argument can also be omitted - when invoking the operation. An argument MUST NOT - be declared with the ... token unless it - is the final argument in the operation’s argument list. -

-
return-type identifier(type... identifier);
-return-type identifier(type identifier, type... identifier);
-

- Extended attributes - that take an argument list - ([Constructor] and - [NamedConstructor], of those - defined in this specification) and callback functions - are also considered to be variadic - when the ... token is used in their argument lists. -

- -
Example
-

- The following IDL fragment defines an interface that has - two variadic operations: -

-
IDL
interface IntegerSet {
-  readonly attribute unsigned long cardinality;
-
-  void union(long... ints);
-  void intersection(long... ints);
-};
-

- In the ECMAScript binding, variadic operations are implemented by - functions that can accept the subsequent arguments: -

-
ECMAScript
var s = getIntegerSet();  // Obtain an instance of IntegerSet.
-
-s.union();                // Passing no arguments corresponding to 'ints'.
-s.union(1, 4, 7);         // Passing three arguments corresponding to 'ints'.
-

- A binding for a language that does not support variadic functions - might specify that an explicit array or list of integers be passed - to such an operation. -

-
- -

- An argument is considered to be an optional argument - if it is declared with the optional keyword. - The final argument of a variadic operation - is also considered to be an optional argument. Declaring an argument - to be optional indicates that the argument value can be omitted - when the operation is invoked. The final argument in an - operation MUST NOT explicitly be declared to be - optional if the operation is variadic. -

-
return-type identifier(type identifier, optional type identifier);
- -

- Optional arguments can also have a default value - specified. If the argument’s identifier is followed by a U+003D EQUALS SIGN ("=") - and a value (matching DefaultValue), - then that gives the optional argument its default value. - The implicitly optional final argument of a variadic - operation MUST NOT have a default value specified. - The default value is the value to be assumed when the operation is called with the - corresponding argument omitted. -

-
return-type identifier(type identifier, optional type identifier = value);
-
Warning
-

- It is strongly suggested not to use default value - of true for boolean-typed arguments, - as this can be confusing for authors who might otherwise expect the default - conversion of undefined to be used (i.e., false). -

-
-

- If the type of an argument is a dictionary type - or a union type that has a - dictionary type as one of its flattened member types, - and that dictionary type and its ancestors have no required members, - and the argument is either the final argument or is followed only by - optional arguments, then - the argument MUST be specified as optional. - Such arguments are always considered to have a - default value of an empty dictionary, - unless otherwise specified. -

-
Note
-

- This is to encourage API designs that do not require authors to pass an - empty dictionary value when they wish only to use the dictionary’s - default values. -

-

- Dictionary types cannot have a default value specified explicitly, so the - “unless otherwise specified” clause above can only be invoked for - a union type that has a - dictionary type as one of its flattened member types. -

-
-

- When a boolean literal token (true or false), - the null token, - an integer token, a - float token or one of - the three special floating point literal values (Infinity, - -Infinity or NaN) is used as the - default value, - it is interpreted in the same way as for a constant. -

-

- Optional argument default values can also be specified using a string - token, whose value is a string type - determined as follows: -

-
    -
  1. Let S be the sequence of Unicode scalar values matched by the string token with its leading and trailing U+0022 QUOTATION MARK ('"') characters removed.
  2. -
  3. Depending on the type of the argument: -
    -
    DOMString
    -
    an enumeration type
    -
    The value of the string token is the sequence of 16 bit unsigned integer code units (hereafter referred to just as code units) corresponding to the UTF-16 encoding of S.
    -
    ByteString
    -
    The value of the string token is the sequence of 8 bit unsigned integer code units corresponding to the UTF-8 encoding of S.
    -
    USVString
    -
    The value of the string token is S.
    -
    -
  4. -
-

- If the type of the optional argument - is an enumeration, then its - default value if specified MUST - be one of the enumeration’s values. -

-

- Optional argument default values can also be specified using the - two token value [], which represents an empty sequence - value. The type of this value is the same the type of the optional - argument it is being used as the default value of. That type - MUST be a - sequence type or a - nullable type. -

- -
Example
-

- The following IDL fragment - defines an interface - with a single operation - that can be invoked with two different argument list lengths: -

-
IDL
interface ColorCreator {
-  object createColor(double v1, double v2, double v3, optional double alpha);
-};
-

- It is equivalent to an interface - that has two overloaded - operations: -

-
IDL
interface ColorCreator {
-  object createColor(double v1, double v2, double v3);
-  object createColor(double v1, double v2, double v3, double alpha);
-};
-
- - -

- If an implementation attempts to invoke an - operation on a - user object (for example, when a callback object - has been supplied to the implementation), and that attempt results in an - exception being thrown, then, unless otherwise specified, that - exception will be propagated to the user code that caused the - implementation to invoke the operation. Similarly, if a value - returned from invoking the operation cannot be converted to - an IDL type, then any exception resulting from this will also - be propagated to the user code that resulted in the implementation - attempting to invoke the operation. -

- -

- The following extended attributes - are applicable to operations: - [Exposed], - [NewObject], - [TreatNullAs], - [Unforgeable]. -

-

- The following extended attributes are applicable to operation arguments: - [Clamp], - [EnforceRange], - [TreatNullAs]. -

- -
[17]DefaultValueConstValue
 | - string
 | - "[" "]"
[47]OperationReturnType OperationRest
 | - SpecialOperation
[48]SpecialOperationSpecial Specials ReturnType OperationRest
[49]SpecialsSpecial Specials
 | - ε
[50]Special"getter"
 | - "setter"
 | - "deleter"
 | - "legacycaller"
[51]OperationRestOptionalIdentifier "(" ArgumentList ")" ";"
[52]OptionalIdentifieridentifier
 | - ε
[53]ArgumentListArgument Arguments
 | - ε
[54]Arguments"," Argument Arguments
 | - ε
[55]ArgumentExtendedAttributeList OptionalOrRequiredArgument
[56]OptionalOrRequiredArgument"optional" Type ArgumentName Default
 | - Type Ellipsis ArgumentName
[57]ArgumentNameArgumentNameKeyword
 | - identifier
[58]Ellipsis"..."
 | - ε
[71]ArgumentNameKeyword - "attribute"
 | - "callback"
 | - "const"
 | - "deleter"
 | - "dictionary" -
 | - "enum"
 | - "getter"
 | - "implements"
 | - "inherit"
 | - "interface"
 | - "iterable" -
 | - "legacycaller"
 | - "partial"
 | - "required"
 | - "serializer"
 | - "setter"
 | - "static"
 | - "stringifier"
 | - "typedef" -
 | - "unrestricted" -
[89]ReturnTypeType
 | - "void"
-
- -
-

3.2.4 Special operations

- -

- A special operation is a - declaration of a certain kind of special behavior on objects implementing - the interface on which the special operation declarations appear. - Special operations are declared by using one or more - special keywords - in an operation declaration. -

-

- There are seven kinds of special operations. The table below indicates - for a given kind of special operation what special keyword - is used to declare it and what the purpose of the special operation is: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Special operationKeywordPurpose
GettersgetterDefines behavior for when an object is indexed for property retrieval.
SetterssetterDefines behavior for when an object is indexed for property - assignment or creation.
DeletersdeleterDefines behavior for when an object is indexed for property deletion.
Legacy callerslegacycallerDefines behavior for when an object is called as if it were a function.
StringifiersstringifierDefines how an object is converted into a DOMString.
SerializersserializerDefines how an object is converted into a serialized form.
-

- Not all language bindings support all of the six kinds of special - object behavior. When special operations are declared using - operations with no identifier, then in language bindings that do - not support the particular kind of special operations there simply - will not be such functionality. -

-
Example
-

The following IDL fragment defines an interface with a getter and a setter:

-
IDL
interface Dictionary {
-  readonly attribute unsigned long propertyCount;
-
-  getter double (DOMString propertyName);
-  setter void (DOMString propertyName, double propertyValue);
-};
-

In language bindings that do not support property getters and setters, - objects implementing Dictionary will not - have that special behavior.

-
- -

- Defining a special operation with an identifier - is equivalent to separating the special operation out into its own - declaration without an identifier. This approach is allowed to - simplify prose descriptions of an interface’s operations. -

-
Example
-

The following two interfaces are equivalent:

-
IDL
interface Dictionary {
-  readonly attribute unsigned long propertyCount;
-
-  getter double getProperty(DOMString propertyName);
-  setter void setProperty(DOMString propertyName, double propertyValue);
-};
-
IDL
interface Dictionary {
-  readonly attribute unsigned long propertyCount;
-
-  double getProperty(DOMString propertyName);
-  void setProperty(DOMString propertyName, double propertyValue);
-
-  getter double (DOMString propertyName);
-  setter void (DOMString propertyName, double propertyValue);
-};
-
- -

- A given special keyword MUST NOT - appear twice on an operation. -

-

- Getters and setters come in two varieties: ones that - take a DOMString as a property name, - known as - named property getters and - named property setters, - and ones that take an unsigned long - as a property index, known as - indexed property getters and - indexed property setters. - There is only one variety of deleter: - named property deleters. - See section 3.2.4.4 - and section 3.2.4.5 - for details. -

-

- On a given interface, - there MUST exist at most one - stringifier, at most one serializer, at most one - named property deleter, - and at most one of each variety of getter and setter. - Multiple legacy callers can exist on an interface - to specify overloaded calling behavior. -

-

- If an interface has a setter of a given variety, - then it MUST also have a getter of that - variety. If it has a named property deleter, - then it MUST also have a - named property getter. -

-

- Special operations declared using operations MUST NOT - be variadic nor have any - optional arguments. -

-

- Special operations MUST NOT be declared on - callback interfaces. -

-

- If an object implements more than one interface - that defines a given special operation, then it is undefined which (if any) - special operation is invoked for that operation. -

- -
-
3.2.4.1 Legacy callers
- -

- When an interface has one or more - legacy callers, it indicates that objects that implement - the interface can be called as if they were functions. As mentioned above, - legacy callers can be specified using an operation - declared with the legacycaller keyword. -

-
legacycaller return-type identifier(arguments…);
-legacycaller return-type (arguments…);
-

- If multiple legacy callers are specified on an interface, overload resolution - is used to determine which legacy caller is invoked when the object is called - as if it were a function. -

-

- Legacy callers MUST NOT be defined to return a - promise type. -

-
Warning
-

- Legacy callers are universally recognised as an undesirable feature. They exist - only so that legacy Web platform features can be specified. Legacy callers - SHOULD NOT be used in specifications unless required to - specify the behavior of legacy APIs, and even then this should be discussed on - the public-script-coord@w3.org - mailing list before proceeding. -

-
- -
Example
-

- The following IDL fragment - defines an interface - with a legacy caller. -

-
IDL
interface NumberQuadrupler {
-  // This operation simply returns four times the given number x.
-  legacycaller double compute(double x);
-};
-

- An ECMAScript implementation supporting this interface would - allow a platform object - that implements NumberQuadrupler - to be called as a function: -

-
ECMAScript
var f = getNumberQuadrupler();  // Obtain an instance of NumberQuadrupler.
-
-f.compute(3);                   // This evaluates to 12.
-f(3);                           // This also evaluates to 12.
-
-
- -
-
3.2.4.2 Stringifiers
- -

- When an interface has a - stringifier, it indicates that objects that implement - the interface have a non-default conversion to a string. As mentioned above, - stringifiers can be specified using an operation - declared with the stringifier keyword. -

-
stringifier DOMString identifier();
-stringifier DOMString ();
-

- If an operation used to declare a stringifier does not have an - identifier, then prose - accompanying the interface MUST define - the stringification behavior - of the interface. If the operation does have an identifier, - then the object is converted to a string by invoking the - operation to obtain the string. -

-

- Stringifiers declared with operations MUST - be declared to take zero arguments and return a DOMString. -

-

- As a shorthand, if the stringifier keyword - is declared using an operation with no identifier, then the - operation’s return type and - argument list can be omitted. -

-
stringifier;
-
Example
-

The following two interfaces are equivalent:

-
IDL
interface A {
-  stringifier DOMString ();
-};
-
IDL
interface A {
-  stringifier;
-};
-
-

- The stringifier keyword - can also be placed on an attribute. - In this case, the string to convert the object to is the - value of the attribute. The stringifier keyword - MUST NOT be placed on an attribute unless - it is declared to be of type DOMString or USVString. - It also MUST NOT be placed on - a static attribute. -

-
stringifier attribute DOMString identifier;
- -
[35]Stringifier"stringifier" StringifierRest
[36]StringifierRestReadOnly AttributeRest
 | - ReturnType OperationRest
 | - ";"
- -
Example
-

- The following IDL fragment - defines an interface that will stringify to the value of its - name attribute: -

-
IDL
[Constructor]
-interface Student {
-  attribute unsigned long id;
-  stringifier attribute DOMString name;
-};
-

- In the ECMAScript binding, using a Student - object in a context where a string is expected will result in the - value of the object’s “name” property being - used: -

-
ECMAScript
var s = new Student();
-s.id = 12345678;
-s.name = '周杰倫';
-
-var greeting = 'Hello, ' + s + '!';  // Now greeting == 'Hello, 周杰倫!'.
-

- The following IDL fragment - defines an interface that has custom stringification behavior that is - not specified in the IDL itself. -

-
IDL
[Constructor]
-interface Student {
-  attribute unsigned long id;
-  attribute DOMString? familyName;
-  attribute DOMString givenName;
-
-  stringifier DOMString ();
-};
-

- Thus, prose is required to explain the stringification behavior, such - as the following paragraph: -

-
-

- Objects that implement the Student - interface must stringify as follows. If the value of the - familyName attribute is - null, the stringification of the - object is the value of the givenName - attribute. Otherwise, if the value of the - familyName attribute is not null, - the stringification of the object is the concatenation of the - value of the givenName attribute, - a single space character, and the value of - the familyName attribute. -

-
-

- An ECMAScript implementation of the IDL would behave as follows: -

-
ECMAScript
var s = new Student();
-s.id = 12345679;
-s.familyName = 'Smithee';
-s.givenName = 'Alan';
-
-var greeting = 'Hi ' + s;  // Now greeting == 'Hi Alan Smithee'.
-
-
- -
-
3.2.4.3 Serializers
- -

- When an interface has a - serializer, it indicates that objects provide - a way for them to be converted into a serialized form. Serializers can be declared - using the serializer keyword: -

-
serializer;
-

- Prose accompanying an interface that declares a serializer in this - way MUST define the - serialization behavior - of the interface. Serialization behavior is defined as returning - a serialized value of one of the following types: -

- -

- How the serialization behavior - is made available on an object in a language binding, and how exactly the abstract - serialized value is converted into - an appropriate concrete value, is language binding specific. -

-
Note
-

In the ECMAScript language binding, - serialization behavior - is exposed as a toJSON method which returns the - serialized value converted - into an ECMAScript value that can be serialized to JSON by the - JSON.stringify function. See section 4.5.8.2 - for details.

-
-

- Serialization behavior - can also be specified directly in IDL, rather than separately as prose. - This is done by following the serializer keyword with - a U+003D EQUALS SIGN ("=") character and - a serialization pattern, - which can take one of the following six forms: -

-
    -
  • -

    A map with entries corresponding to zero or more attributes from the interface, and optionally - attributes from an inherited interface:

    -
    serializer = { attribute-identifier, attribute-identifier, … };
    -serializer = { inherit, attribute-identifier, attribute-identifier, … };
    -

    Each identifier MUST be the identifier of an attribute declared - on the interface. The identified attributes all MUST have a - serializable type.

    -

    The inherit keyword MUST NOT be used unless - the interface inherits from another that defines a serializer, and the closest such interface - defines its serializer using this serialization pattern - form or the following form (i.e. { attribute }).

    -

    The serialization behavior for this - form of serialization pattern is as follows:

    -
      -
    1. Let map be an empty map.
    2. -
    3. If the inherit keyword was used, then set map to be the result of - the serialization behavior of the - closest inherited interface that declares a serializer.
    4. -
    5. For each attribute identifier i in the serialization pattern, in order: -
        -
      1. Remove any entry in map with key name i.
      2. -
      3. Let V be the value of the attribute with identifier i.
      4. -
      5. Add an entry to map whose key name is i and whose - value is result of converting - V to a serialized value.
      6. -
      -
    6. -
    7. Return map.
    8. -
    -
  • -
  • -

    A map with entries corresponding to all attributes from the interface that have - a serializable type, and optionally - attributes from an inherited interface:

    -
    serializer = { attribute };
    -serializer = { inherit, attribute };
    -

    The inherit keyword MUST NOT be used unless - the interface inherits from another that defines a serializer, and the closest such interface - defines its serializer using this serialization pattern - form or the previous form.

    -

    The serialization behavior for this - form of serialization pattern is as follows:

    -
      -
    1. Let map be an empty map.
    2. -
    3. If the inherit keyword was used, then set map to be the result of - the serialization behavior of the - closest inherited interface that declares a serializer.
    4. -
    5. For each identifier i of an attribute on the interface whose type is - a serializable type, in the order they appear - on the interface: -
        -
      1. Remove any entry in map with key name i.
      2. -
      3. Let V be the value of the attribute with identifier i.
      4. -
      5. Add an entry to map whose key name is i and whose - value is result of converting - V to a serialized value.
      6. -
      -
    6. -
    7. Return map.
    8. -
    -
  • -
  • -

    A map with entries corresponding to the named properties:

    -
    serializer = { getter };
    -

    This form MUST NOT be used unless the interface or one it - inherits from supports named properties and the return type of the named property getter - is a serializable type.

    -

    The serialization behavior for this - form of serialization pattern is as follows:

    -
      -
    1. Let map be an empty map.
    2. -
    3. For each supported property name n on the object, in order: -
        -
      1. Let V be the value of the named property with name n.
      2. -
      3. Add an entry to map whose key name is i and whose - value is result of converting - V to a serialized value.
      4. -
      -
    4. -
    5. Return map.
    6. -
    -
  • -
  • -

    A list of value of zero or more attributes on the interface:

    -
    serializer = [ attribute-identifier, attribute-identifier, … ];
    -

    Each identifier MUST be the identifier of an attribute declared - on the interface. The identified attributes all MUST have a - serializable type.

    -

    The serialization behavior for this - form of serialization pattern is as follows:

    -
      -
    1. Let list be an empty list.
    2. -
    3. For each attribute identifier i in the serialization pattern: -
        -
      1. Let V be the value of the attribute with identifier i.
      2. -
      3. Append to list the value that is the result of - converting - V to a serialized value.
      4. -
      -
    4. -
    5. Return list.
    6. -
    -
  • -
  • -

    A list with entries corresponding to the indexed properties:

    -
    serializer = [ getter ];
    -

    This form MUST NOT be used unless the interface or one it - inherits from supports indexed properties and the return type of the indexed property getter - is a serializable type.

    -

    The serialization behavior for this - form of serialization pattern is as follows:

    -
      -
    1. Let list be an empty list.
    2. -
    3. Let i be 0.
    4. -
    5. While i is less than or equal to the greatest supported property index on the object: -
        -
      1. Let V be the value of the indexed property with index i - if i is a supported property index, or null otherwise.
      2. -
      3. Append to list the value that is the result of - converting - V to a serialized value.
      4. -
      5. Set i to i + 1.
      6. -
      -
    6. -
    7. Return map.
    8. -
    -
  • -
  • -

    A single attribute:

    -
    serializer = attribute-identifier;
    -

    The identifier MUST be the identifier of an attribute declared - on the interface, and this attribute MUST have a - serializable type.

    -

    The serialization behavior for this - form of serialization pattern is as follows:

    -
      -
    1. Let V be the value of the attribute with the specified identifier.
    2. -
    3. Return the result of converting - V to a serialized value.
    4. -
    -
  • -
- -
Note
-

- Entries are added to maps in a particular order so that in the ECMAScript language binding - it is defined what order properties are added to objects. This is because this order - can influence the serialization that JSON.stringify can produce. -

-
- -

The list of serializable types and how they are - converted to serialized values is as follows:

-
-
long long
-
converted by choosing the closest equivalent double value - (as when converting a long long to an ECMAScript Number value)
-
unsigned long long
-
converted by choosing the closest equivalent double value - (as when converting a unsigned long long to an ECMAScript Number value)
-
any other integer type
-
float
-
converted by choosing the equivalent double value
-
double
-
boolean
-
DOMString
-
the same value of the respective type
-
an enumeration type
-
the equivalent DOMString value
-
a USVString
-
the DOMString produced by - encoding the given sequence of Unicode scalar values in - UTF-16
-
a ByteString
-
the equivalent DOMString value where each code unit has the same value as the corresponding byte value
-
a nullable serializable type
-
converted to null if that is its value, - otherwise converted as per its inner type
-
a union type where - all of its member types - are serializable types
-
converted as per its specific type
-
a sequence type that - has a serializable type as its element type
-
converted to a list where each element is the result of converting its - corresponding sequence element to a serialized value
-
a dictionary where - all of its members have - serializable types
-
converted to a map consisting of an entry for each dictionary member - that is present, where the entry’s key is the identifier of the dictionary - member and its value is the result of converting the dictionary member’s - value to a serializable type
-
an interface type that has a - serializer
-
converted by invoking the object’s serializer
-
- -

- Serializers can also be specified using an operation - with the serializer keyword: -

-
serializer type identifier();
-

- Serializers declared with operations MUST - be declared to take zero arguments and return a serializable type. -

-

- The serialization behavior - of the interface with a serializer declared with an operation is the result of - converting - the value returned from invoking the operation to a serialized value. -

- -
[30]Serializer"serializer" SerializerRest
[31]SerializerRestOperationRest
 | - "=" SerializationPattern ";"
 | - ";"
[32]SerializationPattern"{" SerializationPatternMap "}"
 | - "[" SerializationPatternList "]"
 | - identifier
[33]SerializationPatternMap"getter"
 | - "inherit" Identifiers
 | - identifier Identifiers
 | - ε
[34]SerializationPatternList"getter"
 | - identifier Identifiers
 | - ε
[91]Identifiers"," identifier Identifiers
 | - ε
- -
Example
-

- The following IDL fragment defines - an interface Transaction that has a - serializer defines in prose: -

-
IDL
interface Transaction {
-  readonly attribute Account from;
-  readonly attribute Account to;
-  readonly attribute double amount;
-  readonly attribute DOMString description;
-  readonly attribute unsigned long number;
-
-  serializer;
-};
-
-interface Account {
-  DOMString name;
-  unsigned long number;
-};
-

- The serializer could be defined as follows: -

-
-

- The serialization behavior - of the Transaction interface is to run the following - algorithm, where O is the object that implements Transaction: -

-
    -
  1. Let map be an empty map.
  2. -
  3. Add an entry to map whose key is “from” and whose value is - the serialized value of - the number attribute on the Account - object referenced by the from attribute on O.
  4. -
  5. Add an entry to map whose key is “to” and whose value is - the serialized value of - the number attribute on the Account - object referenced by the from attribute on O.
  6. -
  7. For both of the attributes amount and description, - add an entry to map whose key is the - identifier of the attribute - and whose value is the serialized value - of the value of the attribute on O.
  8. -
  9. Return map.
  10. -
-
-

- If it was acceptable for Account objects to be serializable - on their own, then serialization patterns - could be used to avoid having to define the serialization behavior - in prose: -

-
IDL
interface Transaction {
-  readonly attribute Account from;
-  readonly attribute Account to;
-  readonly attribute double amount;
-  readonly attribute DOMString description;
-  readonly attribute unsigned long number;
-
-  serializer = { from, to, amount, description };
-};
-
-interface Account {
-  DOMString name;
-  unsigned long number;
-
-  serializer = number;
-};
-

- In the ECMAScript language binding, there would exist a toJSON method on - Transaction objects: -

-
ECMAScript
// Get an instance of Transaction.
-var txn = getTransaction();
-
-// Evaluates to an object like this:
-// {
-//   from: 1234
-//   to: 5678
-//   amount: 110.75
-//   description: "dinner"
-// }
-txn.toJSON();
-
-// Evaluates to a string like this:
-// '{"from":1234,"to":5678,"amount":110.75,"description":"dinner"}'
-JSON.stringify(txn);
-
-
- -
-
3.2.4.4 Indexed properties
- -

- An interface that defines - an indexed property getter - is said to support indexed properties. -

-

- If an interface supports indexed properties, - then the interface definition MUST be accompanied by - a description of what indices the object can be indexed with at - any given time. These indices are called the supported property indices. -

-

- Indexed property getters MUST - be declared to take a single unsigned long argument. - Indexed property setters MUST - be declared to take two arguments, where the first is an unsigned long. -

-
getter type identifier(unsigned long identifier);
-setter type identifier(unsigned long identifier, type identifier);
-
-getter type (unsigned long identifier);
-setter type (unsigned long identifier, type identifier);
-

- The following requirements apply to the definitions of indexed property getters and setters: -

-
    -
  • - If an indexed property getter was specified using an operation - with an identifier, - then the value returned when indexing the object with a given supported property index - is the value that would be returned by invoking the operation, passing - the index as its only argument. If the operation used to declare the indexed property getter - did not have an identifier, then the interface definition must be accompanied - by a description of how to determine the value of an indexed property - for a given index. -
  • -
  • - If an indexed property setter was specified using an operation - with an identifier, - then the behavior that occurs when indexing the object for property assignment with a given supported property index and value - is the same as if the operation is invoked, passing - the index as the first argument and the value as the second argument. If the operation used to declare the indexed property setter - did not have an identifier, then the interface definition must be accompanied - by a description of how to set the value of an existing indexed property - and how to set the value of a new indexed property - for a given property index and value. -
  • -
- -
Note
-

- Note that if an indexed property getter or - setter - is specified using an operation with an identifier, - then indexing an object with an integer that is not a supported property index - does not necessarily elicit the same behavior as invoking the operation with that index. The actual behavior in this - case is language binding specific. -

-

- In the ECMAScript language binding, a regular property lookup is done. For example, take the following IDL: -

-
IDL
interface A {
-  getter DOMString toWord(unsigned long index);
-};
-

- Assume that an object implementing A has supported property indices - in the range 0 ≤ index < 2. Also assume that toWord is defined to return - its argument converted into an English word. The behavior when invoking the - operation with an out of range index - is different from indexing the object directly: -

-
ECMAScript
var a = getA();
-
-a.toWord(0);  // Evalautes to "zero".
-a[0];         // Also evaluates to "zero".
-
-a.toWord(5);  // Evaluates to "five".
-a[5];         // Evaluates to undefined, since there is no property "5".
-
- -
Example
-

- The following IDL fragment defines an interface - OrderedMap which allows - retrieving and setting values by name or by index number: -

-
IDL
interface OrderedMap {
-  readonly attribute unsigned long size;
-
-  getter any getByIndex(unsigned long index);
-  setter void setByIndex(unsigned long index, any value);
-
-  getter any get(DOMString name);
-  setter void set(DOMString name, any value);
-};
-

- Since all of the special operations are declared using - operations with identifiers, the only additional prose - that is necessary is that which describes what keys those sets - have. Assuming that the get() operation is - defined to return null if an - attempt is made to look up a non-existing entry in the - OrderedMap, then the following - two sentences would suffice: -

-
-

- An object map implementing OrderedMap - supports indexed properties with indices in the range - 0 ≤ index < map.size. -

-

- Such objects also support a named property for every name that, - if passed to get(), would return a non-null value. -

-
-

- As described in section 4.7 , - an ECMAScript implementation would create - properties on a platform object implementing - OrderedMap that correspond to - entries in both the named and indexed property sets. - These properties can then be used to interact - with the object in the same way as invoking the object’s - methods, as demonstrated below: -

-
ECMAScript
// Assume map is a platform object implementing the OrderedMap interface.
-var map = getOrderedMap();
-var x, y;
-
-x = map[0];       // If map.length > 0, then this is equivalent to:
-                  //
-                  //   x = map.getByIndex(0)
-                  //
-                  // since a property named "0" will have been placed on map.
-                  // Otherwise, x will be set to undefined, since there will be
-                  // no property named "0" on map.
-
-map[1] = false;   // This will do the equivalent of:
-                  //
-                  //   map.setByIndex(1, false)
-
-y = map.apple;    // If there exists a named property named "apple", then this
-                  // will be equivalent to:
-                  //
-                  //   y = map.get('apple')
-                  //
-                  // since a property named "apple" will have been placed on
-                  // map.  Otherwise, y will be set to undefined, since there
-                  // will be no property named "apple" on map.
-
-map.berry = 123;  // This will do the equivalent of:
-                  //
-                  //   map.set('berry', 123)
-
-delete map.cake;  // If a named property named "cake" exists, then the "cake"
-                  // property will be deleted, and then the equivalent to the
-                  // following will be performed:
-                  //
-                  //   map.remove("cake")
-
-
- -
-
3.2.4.5 Named properties
- -

- An interface that defines - a named property getter - is said to support named properties. -

-

- If an interface supports named properties, - then the interface definition MUST be accompanied by - a description of the ordered set of names that can be used to index the object - at any given time. These names are called the - supported property names. -

-

- Named property getters and deleters MUST - be declared to take a single DOMString argument. - Named property setters MUST - be declared to take two arguments, where the first is a DOMString. -

-
getter type identifier(DOMString identifier);
-setter type identifier(DOMString identifier, type identifier);
-deleter type identifier(DOMString identifier);
-
-getter type (DOMString identifier);
-setter type (DOMString identifier, type identifier);
-deleter type (DOMString identifier);
-

- The following requirements apply to the definitions of named property getters, setters and deleters: -

-
    -
  • - If a named property getter was specified using an operation - with an identifier, - then the value returned when indexing the object with a given supported property name - is the value that would be returned by invoking the operation, passing - the name as its only argument. If the operation used to declare the named property getter - did not have an identifier, then the interface definition must be accompanied - by a description of how to determine the value of a named property - for a given property name. -
  • -
  • - If a named property setter was specified using an operation - with an identifier, - then the behavior that occurs when indexing the object for property assignment with a given supported property name and value - is the same as if the operation is invoked, passing - the name as the first argument and the value as the second argument. If the operation used to declare the named property setter - did not have an identifier, then the interface definition must be accompanied - by a description of how to set the value of an existing named property - and how to set the value of a new named property - for a given property name and value. -
  • -
  • - If a named property deleter was specified using an operation - with an identifier, - then the behavior that occurs when indexing the object for property deletion with a given supported property name - is the same as if the operation is invoked, passing - the name as the only argument. If the operation used to declare the named property deleter - did not have an identifier, then the interface definition must be accompanied - by a description of how to delete an existing named property - for a given property name. -
  • -
- -
Note
-

- As with indexed properties, - if an named property getter, - setter or - deleter - is specified using an operation with an identifier, - then indexing an object with a name that is not a supported property name - does not necessarily elicit the same behavior as invoking the operation with that name; the behavior - is language binding specific. -

-
-
-
- -
-

3.2.5 Static attributes and operations

- -

- Static attributes and - static operations are ones that - are not associated with a particular instance of the - interface - on which it is declared, and is instead associated with the interface - itself. Static attributes and operations are declared by using the - static keyword in their declarations. -

-

- It is language binding specific whether it is possible to invoke - a static operation or get or set a static attribute through a reference - to an instance of the interface. -

-

- Static attributes and operations MUST NOT be - declared on callback interfaces. -

- -
[37]StaticMember"static" StaticMemberRest
[38]StaticMemberRestReadOnly AttributeRest
 | - ReturnType OperationRest
- -
Example
-

- The following IDL fragment defines an interface - Circle that has a static - operation declared on it: -

-
IDL
interface Point { /* ... */ };
-
-interface Circle {
-  attribute double cx;
-  attribute double cy;
-  attribute double radius;
-
-  static readonly attribute long triangulationCount;
-  static Point triangulate(Circle c1, Circle c2, Circle c3);
-};
-

- In the ECMAScript language binding, the Function object for - triangulate and the accessor property for triangulationCount - will exist on the interface object - for Circle: -

-
ECMAScript
var circles = getCircles();           // an Array of Circle objects
-
-typeof Circle.triangulate;            // Evaluates to "function"
-typeof Circle.triangulationCount;     // Evaluates to "number"
-Circle.prototype.triangulate;         // Evaluates to undefined
-Circle.prototype.triangulationCount;  // Also evaluates to undefined
-circles[0].triangulate;               // As does this
-circles[0].triangulationCount;        // And this
-
-// Call the static operation
-var triangulationPoint = Circle.triangulate(circles[0], circles[1], circles[2]);
-
-// Find out how many triangulations we have done
-window.alert(Circle.triangulationCount);
- -
-
- -
-

3.2.6 Overloading

- -

- If a regular operation - or static operation - defined on an interface - has an identifier - that is the same as the identifier of another operation on that - interface of the same kind (regular or static), then the operation is said to be - overloaded. When the identifier - of an overloaded operation is used to invoke one of the - operations on an object that implements the interface, the - number and types of the arguments passed to the operation - determine which of the overloaded operations is actually - invoked. If an interface has multiple - legacy callers defined on it, - then those legacy callers are also said to be overloaded. - In the ECMAScript language binding, constructors - can be overloaded too. There are some restrictions on the arguments - that overloaded operations, legacy callers and constructors can be - specified to take, and in order to describe these restrictions, - the notion of an effective overload set is used. -

-

- Operations and legacy callers - MUST NOT be overloaded across interface - and partial interface definitions. -

-
Note
-

- For example, the overloads for both f and g - are disallowed: -

-
IDL
interface A {
-  void f();
-};
-
-partial interface A {
-  void f(double x);
-  void g();
-};
-
-partial interface A {
-  void g(DOMString x);
-};
-

Note that the [Constructor] and - [NamedConstructor] - extended attributes are disallowed from appearing - on partial interface definitions, - so there is no need to also disallow overloading for constructors.

-
-

- An effective overload set - represents the allowable invocations for a particular - operation, - constructor (specified with [Constructor] - or [NamedConstructor]), - legacy caller or - callback function. - The algorithm to compute an effective overload set - operates on one of the following six types of IDL constructs, and listed with them below are - the inputs to the algorithm needed to compute the set. -

-
-
For regular operations
-
For static operations
-
- -
-
For legacy callers
-
- -
-
For constructors
-
- -
-
For named constructors
-
- -
-
For callback functions
-
- -
-
-

- An effective overload set is used, among other things, to determine whether there are ambiguities in the - overloaded operations, constructors and callers specified on an interface. -

-

- The elements of an effective overload set are tuples of the form - <callabletype list, optionality list>. If the effective overload - set is for regular operations, static operations or legacy callers, then callable is an operation; - if it is for constructors or named constructors, then callable is an - extended attribute; and if it is for callback functions, then callable - is the callback function itself. In all cases, type list is a list - of IDL types, and optionality list is a list of three possible optionality values – - “required”, “optional” or “variadic” – indicating whether - the argument at a given index was declared as being optional - or corresponds to a variadic argument. - Each tuple represents an allowable invocation of the operation, - constructor, legacy caller or callback function with an argument value list of the given types. - Due to the use of optional arguments - and variadic operations - and constructors, there may be multiple entries in an effective overload set identifying - the same operation or constructor. -

-

- The algorithm below describes how to compute an effective overload set. - The following input variables are used, if they are required: -

-
    -
  • the identifier of the operation or named constructor is A
  • -
  • the argument count is N
  • -
  • the interface is I
  • -
  • the callback function is C
  • -
-

- Whenever an argument of an extended - attribute is mentioned, it is referring to an argument of the - extended attribute’s named argument list. -

-
    -
  1. Initialize S to ∅.
  2. -
  3. Let F be a set with elements as follows, according to the kind of effective overload set: -
    -
    For regular operations
    -
    - The elements of F are the regular operations with - identifier A defined on interface I. -
    -
    For static operations
    -
    - The elements of F are the static operations with - identifier A defined on interface I. -
    -
    For constructors
    -
    - The elements of F are the - [Constructor] - extended attributes on interface I. -
    -
    For named constructors
    -
    - The elements of F are the - [NamedConstructor] - extended attributes on interface I whose - named argument lists’ - identifiers are A. -
    -
    For legacy callers
    -
    - The elements of F are the legacy callers - defined on interface I. -
    -
    For callback functions
    -
    - The single element of F is the callback function itself, C. -
    -
    -
  4. - -
  5. - Let maxarg be the maximum number of arguments the operations, constructor extended attributes or callback functions in F are declared to take. - For variadic operations and constructor extended attributes, - the argument on which the ellipsis appears counts as a single argument. -
    Note
    -

    So void f(long x, long... y); is considered to be declared to take two arguments.

    -
    -
  6. -
  7. Let m be the maximum of maxarg and N.
  8. -
  9. For each operation, extended attribute or callback function X in F: -
      -
    1. Let n be the number of arguments X is declared to take.
    2. -
    3. Let t0..n−1 be a list of types, where ti - is the type of X’s argument at index i.
    4. -
    5. Let o0..n−1 be a list of optionality values, where oi - is “variadic” if X’s argument at index i is a final, variadic argument, - “optional” if the argument is optional, - and “required” otherwise.
    6. -
    7. Add to S the tuple <Xt0..n−1, o0..n−1>.
    8. -
    9. If X is declared to be variadic, then: -
        -
      1. Add to S the tuple <Xt0..n−2o0..n−2>. -
        Note
        -

        This leaves off the final, variadic argument.

        -
        -
      2. -
      3. For every integer i, such that n ≤ i ≤ m−1: -
          -
        1. Let u0..i be a list of types, where uj = tj (for j < n) and uj = tn−1 (for j ≥ n).
        2. -
        3. Let p0..i be a list of optionality values, where pj = oj (for j < n) and pj = “variadic” (for j ≥ n).
        4. -
        5. Add to S the tuple <Xu0..ip0..i>.
        6. -
        -
      4. -
      -
    10. -
    11. Initialize i to n−1.
    12. -
    13. While i ≥ 0: -
        -
      1. If argument i of X is not optional, then break this loop.
      2. -
      3. Otherwise, add to S the tuple <Xt0..i−1o0..i−1>.
      4. -
      5. Set i to i−1.
      6. -
      -
    14. -
    15. If n > 0 and all arguments of X are optional, then add to S the tuple <X, (), ()> (where “()” represents the empty list).
    16. -
    -
  10. -
  11. - The effective overload set is S. -
  12. -
-
Example
-

- For the following interface: -

-
IDL
interface A {
-  /* f1 */ void f(DOMString a);
-  /* f2 */ void f(Node a, DOMString b, double... c);
-  /* f3 */ void f();
-  /* f4 */ void f(Event a, DOMString b, optional DOMString c, double... d);
-};
-

- assuming Node and Event - are two other interfaces of which no object can implement both, - the effective overload set - for regular operations with - identifier f and argument count 4 is: -

-
- { <f1, (DOMString), (required)>,
- <f2, (Node, DOMString), (required, required)>,
- <f2, (Node, DOMString, double), (required, required, variadic)>,
- <f2, (Node, DOMString, double, double), (required, required, variadic, variadic)>,
- <f3, (), ()>,
- <f4, (Event, DOMString), (required, required)>,
- <f4, (Event, DOMString, DOMString), (required, required, optional)>,
- <f4, (Event, DOMString, DOMString, double), (required, required, optional, variadic)> } -
-
- -

- Two types are distinguishable if - at most one of the two includes a nullable type - or is a dictionary type, - and at least one of the following three conditions is true: -

-
    -
  1. -

    - The two types (taking their inner types - if they are nullable types) appear - in the following table and there is a “●” mark in the corresponding entry - or there is a letter in the corresponding entry and the designated additional - requirement below the table is satisfied:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    booleannumeric typesstring typesinterfaceobjectcallback
    function
    dictionarysequence<T>exception typesbuffer source types
    boolean
    numeric types
    string types
    interface(a)(b)(b)
    object
    callback function
    dictionary
    sequence<T>
    exception types
    buffer source types
    -
      -
    1. The two identified interfaces are - not the same, it is not possible for a single platform object - to implement both interfaces, - and it is not the case that both are callback interfaces.
    2. -
    3. The interface type is not a callback interface.
    4. -
    -
  2. -
  3. - One type is a union type or nullable union type, - the other is neither a union type nor a nullable union type, and each - member type of the first is distinguishable - with the second. -
  4. -
  5. - Both types are either a union type or nullable union type, and each member type of the one - is distinguishable with each member type of the other. -
  6. -
-
Note
-

Promise types do not appear in the above table, and as a consequence - are not distinguishable with any other type.

-
-

- If there is more than one entry in an effective overload set - that has a given type list length, then for those entries there - MUST be an index i such - that for each pair of entries the types at index i are - distinguishable. - The lowest such index is termed the distinguishing argument index - for the entries of the effective overload set with the given type list length. -

-
Example
-

- Consider the effective overload set shown in the previous example. - There are multiple entries in the set with type lists 2, 3 and 4. - For each of these type list lengths, the distinguishing - argument index is 0, since Node and - Event are distinguishable. -

-

- The following use of overloading however is invalid: -

-
IDL
interface B {
-  void f(DOMString x);
-  void f(double x);
-};
-

- since DOMString and - double are not distinguishable. -

-
-

- In addition, for each index j, where j is less than the - distinguishing argument index - for a given type list length, the types at index j in - all of the entries’ type lists MUST be the same - and the booleans in the corresponding list indicating argument optionality MUST - be the same. -

-
Example
-

The following is invalid:

-
IDL
interface B {
-  /* f1 */ void f(DOMString w);
-  /* f2 */ void f(long w, double x, Node y, Node z);
-  /* f3 */ void f(double w, double x, DOMString y, Node z);
-};
-

- For argument count 4, the effective overload set is: -

-
- { <f1, (DOMString), (required)>,
- <f2, (long, double, Node, Node), (required, required, required, required)>,
- <f3, (double, double, DOMString, Node), (required, required, required, required)> } -
-

- Looking at entries with type list length 4, the - distinguishing argument index - is 2, since Node and - DOMString are distinguishable. - However, since the arguments in these two overloads at index 0 are different, - the overloading is invalid. -

-
- -
- -
-

3.2.7 Iterable declarations

- -

- An interface can be declared to be - iterable by using an iterable declaration - (matching Iterable) in the body of the interface. -

-
iterable<value-type>;
-iterable<key-type, value-type>;
-

- Objects implementing an interface that is declared to be iterable - support being iterated over to obtain a sequence of values. -

-
Note
-

In the ECMAScript language binding, an interface that is iterable - will have “entries”, “keys”, “values” and @@iterator - properties on its interface prototype object.

-
-

- If a single type parameter is given, then the interface has a - value iterator and provides - values of the specified type. - If two type parameters are given, then the interface has a - pair iterator and provides - value pairs, where the first value is a key and the second is the - value associated with the key. -

-

- Prose accompanying an interface with a value iterator - MUST define what the - list of values to iterate over is, - unless the interface also - supports indexed properties, - in which case the values of the indexed properties are implicitly - iterated over. Prose accompanying an interface with a - pair iterator - MUST define what the list of - value pairs to iterate over - is. -

-
Note
-

Interfaces that support indexed properties - need to have a “length” attribute for the iterator to work correctly.

-
-

- The prose is responsible for defining that the list of values - or value pairs to iterate over is snapshotted at the time - iteration begins, if that is desired. To handle lists that - can change during iteration, the behavior of an - iterator defined to to loop through the items in order, starting - at index 0, and advancing this index on each iteration. Iteration ends when - the index has gone past the end of the list. -

-
Note
-

This is how array iterator objects work. - For interfaces that support indexed properties, - the iterator objects returned by “entries”, “keys”, “values” and @@iterator are - actual array iterator objects.

-
-

- Interfaces with iterable declarations MUST NOT - have any interface members - named “entries”, “keys” or “values”, - or have any inherited - or consequential - interfaces that have interface members with these names. -

- -
Example
-

Consider the following interface SessionManager, which allows access to - a number of Session objects:

-
IDL
interface SessionManager {
-  Session getSessionForUser(DOMString username);
-  readonly attribute unsigned long sessionCount;
-
-  iterable<Session>;
-};
-
-interface Session {
-  readonly attribute DOMString username;
-  // ...
-};
-

- The behavior of the iterator could be defined like so: -

-
-

- The values to iterate over - are a snapshot of the open Session objects - on the SessionManager sorted by username. -

-
-

- In the ECMAScript language binding, the interface prototype object - for the SessionManager interface - has a values method that is a function, which, when invoked, - returns an iterator object that itself has a next method that returns the - next value to be iterated over. It has values and entries - methods that iterate over the indexes of the list of session objects - and [index, session object] pairs, respectively. It also has - a @@iterator method that allows a SessionManager - to be used in a for..of loop: -

-
ECMAScript
// Get an instance of SessionManager.
-// Assume that it has sessions for two users, "anna" and "brian".
-var sm = getSessionManager();
-
-typeof SessionManager.prototype.values;            // Evaluates to "function"
-var it = sm.values();                              // values() returns an iterator object
-String(it);                                        // Evaluates to "[object SessionManager Iterator]"
-typeof it.next;                                    // Evaluates to "function"
-
-// This loop will alert "anna" and then "brian".
-for (;;) {
-  let result = it.next();
-  if (result.done) {
-    break;
-  }
-  let session = result.value;
-  window.alert(session.username);
-}
-
-// This loop will also alert "anna" and then "brian".
-for (let session of sm) {
-  window.alert(session.username);
-}
-

- If the SessionManager interface supported indexed properties - and had an attribute named “length” - that reflected the number of session objects, we could avoid defining the - values to iterate over. -

-
- -

- An interface MUST NOT have more than one - iterable declaration. - The inherited - and consequential - interfaces of an interface with an - iterable declaration - MUST NOT also have an - iterable declaration. -

-

- The following extended attributes are applicable to iterable declarations: - [Exposed]. -

- - -
[59]Iterable"iterable" "<" Type OptionalType ">" ";"
[60]OptionalType"," Type
 | - ε
-
- - - - -
- -
-

3.3 Dictionaries

- -

- A dictionary is a definition (matching - Dictionary) - used to define an associative array data type with a fixed, ordered set of key–value pairs, - termed dictionary members, - where keys are strings and values are of a particular type specified in the definition. -

-
dictionary identifier {
-  dictionary-members…
-};
-

- Dictionaries are always passed by value. In language bindings where a dictionary is represented by an object of some kind, passing a - dictionary to a platform object will not result in a reference to the dictionary being kept by that object. - Similarly, any dictionary returned from a platform object will be a copy and modifications made to it will not be visible to the platform object. -

-

- A dictionary can be defined to inherit from another dictionary. - If the identifier of the dictionary is followed by a colon and a identifier, - then that identifier identifies the inherited dictionary. The identifier - MUST identify a dictionary. -

-

- A dictionary MUST NOT be declared such that - its inheritance hierarchy has a cycle. That is, a dictionary - A cannot inherit from itself, nor can it inherit from another - dictionary B that inherits from A, and so on. -

-
dictionary Base {
-  dictionary-members…
-};
-
-dictionary Derived : Base {
-  dictionary-members…
-};
-

- The inherited dictionaries of - a given dictionary D is the set of all dictionaries that D - inherits from, directly or indirectly. If D does not inherit - from another dictionary, then the set is empty. Otherwise, the set - includes the dictionary E that D inherits - from and all of E’s inherited dictionaries. -

-

- A dictionary value of type D can have key–value pairs corresponding - to the dictionary members defined on D and on any of D’s - inherited dictionaries. - On a given dictionary value, the presence of each dictionary member - is optional, unless that member is specified as required. - When specified in the dictionary value, a dictionary member is said to be - present, otherwise it is not present. - Dictionary members can also optionally have a default value, which is - the value to use for the dictionary member when passing a value to a - platform object that does - not have a specified value. Dictionary members with default values are - always considered to be present. -

-
Warning
-

- As with operation argument default values, - is strongly suggested not to use of true as the - default value for - boolean-typed - dictionary members, - as this can be confusing for authors who might otherwise expect the default - conversion of undefined to be used (i.e., false). -

-
-

- Each dictionary member (matching - DictionaryMember) is specified - as a type (matching Type) followed by an - identifier - (given by an identifier token following - the type). The identifier is the key name of the key–value pair. - If the Type - is an identifier - followed by ?, then the identifier - MUST identify an - interface, enumeration, - callback function or typedef. - If the dictionary member type is an identifier - not followed by ?, then the identifier MUST - identify any one of those definitions or a dictionary. -

-
dictionary identifier {
-  type identifier;
-};
-

- If the identifier is followed by a U+003D EQUALS SIGN ("=") - and a value (matching DefaultValue), - then that gives the dictionary member its default value. -

-
dictionary identifier {
-  type identifier = value;
-};
-

- When a boolean literal token (true or false), - the null token, - an integer token, a - float token, - one of the three special floating point literal values (Infinity, - -Infinity or NaN), - a string token or - the two token sequence [] used as the - default value, - it is interpreted in the same way as for an operation’s - optional argument default value. -

-

- If the type of the dictionary member - is an enumeration, then its - default value if specified MUST - be one of the enumeration’s values. -

-

- If the type of the dictionary member is preceded by the - required keyword, the member is considered a - required dictionary member - and must be present on the dictionary. A - required dictionary - member MUST NOT have a default value. -

-
dictionary identifier {
-  required type identifier;
-};
-

- The type of a dictionary member MUST NOT include - the dictionary it appears on. A type includes a dictionary D - if at least one of the following is true: -

-
    -
  • the type is D
  • -
  • the type is a dictionary that inherits from D
  • -
  • the type is a nullable type - whose inner type includes D
  • -
  • the type is a sequence type - whose element type includes D
  • -
  • the type is a union type, - one of whose member types - includes D
  • -
  • the type is a dictionary, one of whose members or inherited members has - a type that includes D
  • -
-

- As with interfaces, the IDL for dictionaries can be split into multiple parts - by using partial dictionary definitions - (matching "partial" Dictionary). - The identifier of a partial - dictionary definition MUST be the same as the - identifier of a dictionary definition. All of the members that appear on each - of the partial dictionary definitions are considered to be members of - the dictionary itself. -

-
dictionary SomeDictionary {
-  dictionary-members…
-};
-
-partial dictionary SomeDictionary {
-  dictionary-members…
-};
-
Note
-

As with partial interface definitions, partial dictionary definitions are intended for use as a specification - editorial aide, allowing the definition of an interface to be separated - over more than one section of the document, and sometimes multiple documents.

-
-

- The order of the dictionary members - on a given dictionary is such that inherited dictionary members are ordered - before non-inherited members, and the dictionary members on the one - dictionary definition (including any partial dictionary definitions) are - ordered lexicographically by the Unicode codepoints that comprise their - identifiers. -

-
Note
-

For example, with the following definitions:

-
IDL
dictionary B : A {
-  long b;
-  long a;
-};
-
-dictionary A {
-  long c;
-  long g;
-};
-
-dictionary C : B {
-  long e;
-  long f;
-};
-
-partial dictionary A {
-  long h;
-  long d;
-};
-

- the order of the dictionary members - of a dictionary value of type C is - c, d, g, h, a, b, e, f. -

-

- Dictionaries are required to have their members ordered because - in some language bindings the behavior observed when passing - a dictionary value to a platform object depends on the order - the dictionary members are fetched. For example, consider the - following additional interface: -

-
IDL
interface Something {
-  void f(A a);
-};
-

- and this ECMAScript code: -

-
ECMAScript
var something = getSomething();  // Get an instance of Something.
-var x = 0;
-
-var dict = { };
-Object.defineProperty(dict, "d", { get: function() { return ++x; } });
-Object.defineProperty(dict, "c", { get: function() { return ++x; } });
-
-something.f(dict);
-

- The order that the dictionary members are fetched in determines - what values they will be taken to have. Since the order for - A is defined to be c then d, - the value for c will be 1 and the value for d will be 2. -

-
-

- The identifier of a dictionary member MUST NOT be - the same as that of another dictionary member defined on the dictionary or - on that dictionary’s inherited dictionaries. -

-

- Dictionaries MUST NOT be used as the type of an - attribute or - constant. -

-

- The following extended attributes are applicable to dictionaries: - [Constructor], - [Exposed]. -

-

- The following extended attributes are applicable to dictionary members: - [Clamp], - [EnforceRange]. -

-
[6]Partial"partial" PartialDefinition
[7]PartialDefinitionPartialInterface
 | - PartialDictionary
[11]Dictionary"dictionary" identifier Inheritance "{" DictionaryMembers "}" ";"
[12]DictionaryMembersExtendedAttributeList DictionaryMember DictionaryMembers
 | - ε
[13]DictionaryMemberRequired Type identifier Default ";"
[15]PartialDictionary"dictionary" identifier "{" DictionaryMembers "}" ";"
[16]Default"=" DefaultValue
 | - ε
[17]DefaultValueConstValue
 | - string
 | - "[" "]"
[18]Inheritance":" identifier
 | - ε
-
Example
-

- One use of dictionary types is to allow a number of optional arguments to - an operation without being - constrained as to the order they are specified at the call site. For example, - consider the following IDL fragment: -

-
IDL
[Constructor]
-interface Point {
-  attribute double x;
-  attribute double y;
-};
-
-dictionary PaintOptions {
-  DOMString? fillPattern = "black";
-  DOMString? strokePattern = null;
-  Point position;
-};
-
-interface GraphicsContext {
-  void drawRectangle(double width, double height, optional PaintOptions options);
-};
-

- In an ECMAScript implementation of the IDL, an Object - can be passed in for the optional PaintOptions dictionary: -

-
ECMAScript
// Get an instance of GraphicsContext.
-var ctx = getGraphicsContext();
-
-// Draw a rectangle.
-ctx.drawRectangle(300, 200, { fillPattern: "red", position: new Point(10, 10) });
-

- Both fillPattern and strokePattern are given default values, - so if they are omitted, the definition of drawRectangle can assume that they - have the given default values and not include explicit wording to handle - their non-presence. -

-
-
- -
-

3.4 Exceptions

- -

- An exception is a type of object that - represents an error and which can be thrown or treated as a first - class value by implementations. Web IDL does not allow exceptions - to be defined, but instead has a number of pre-defined exceptions - that specifications can reference and throw in their definition of - operations, attributes, and so on. Exceptions have an - error name, - a DOMString, - which is the type of error the exception represents, and a - message, which is an optional, - user agent-defined value that provides human readable details of the error. -

-

- There are two kinds of exceptions available to be thrown from specifications. - The first is a simple exception, which - is identified by one of the following names: -

-
    -
  • Error
  • -
  • EvalError
  • -
  • RangeError
  • -
  • ReferenceError
  • -
  • TypeError
  • -
  • URIError
  • -
-

- These correspond to all of the ECMAScript error objects ( - [ECMA-262] - , section 19.5) (apart from - SyntaxError, which is deliberately omitted as - it is for use only by the ECMAScript parser). - The meaning of - each simple exception matches - its corresponding Error object in the - ECMAScript specification. -

-

- The second kind of exception is a DOMException, - which is an exception that encapsulates a name and an optional integer code, - for compatibility with historically defined exceptions in the DOM. -

-

- For simple exceptions, - the error name is the name - of the exception. - For a DOMException, - the error name MUST - be one of the names listed in the error names table - below. The table also indicates the DOMException's integer code - for that error name, if it has one. -

-

- There are two types that can be used to refer to - exception objects: Error, which encompasses all exceptions, - and DOMException which includes just DOMException objects. - This allows for example an operation - to be declared to have a DOMException - return type or an attribute - to be of type Error. -

-

- Exceptions can be created by providing its - error name. - Exceptions can also be thrown, by providing the - same details required to create one. -

-

- The resulting behavior from creating and throwing an exception is language binding-specific. -

-
Note
-

- See section 4.12 - for details on what creating and throwing an exception - entails in the ECMAScript language binding. -

-
-
Example
-

- Here is are some examples of wording to use to create and throw exceptions. - To throw a new simple exception named - TypeError: -

-
-

Throw a TypeError.

-
-

- To throw a new DOMException with - error name - IndexSizeError: -

-
-

Throw an IndexSizeError.

-
-

- To create a new DOMException with - error name - SyntaxError: -

-
-

Let object be a newly created SyntaxError.

-
-
- -
-

3.4.1 Error names

- -

- The error names table below lists all the allowed error names - for DOMExceptions, a description, - and legacy code values. -

- -
Note
-

If an error name is not listed here, please file a bug as indicated at the top of this specification and it will be addressed shortly. Thanks!

-
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDescriptionLegacy code name and value
"IndexSizeError"The index is not in the allowed range.INDEX_SIZE_ERR (1)
"HierarchyRequestError"The operation would yield an incorrect node tree.HIERARCHY_REQUEST_ERR (3)
"WrongDocumentError"The object is in the wrong document.WRONG_DOCUMENT_ERR (4)
"InvalidCharacterError"The string contains invalid characters.INVALID_CHARACTER_ERR (5)
"NoModificationAllowedError"The object can not be modified.NO_MODIFICATION_ALLOWED_ERR (7)
"NotFoundError"The object can not be found here.NOT_FOUND_ERR (8)
"NotSupportedError"The operation is not supported.NOT_SUPPORTED_ERR (9)
"InUseAttributeError"The attribute is in use.INUSE_ATTRIBUTE_ERR (10)
"InvalidStateError"The object is in an invalid state.INVALID_STATE_ERR (11)
"SyntaxError"The string did not match the expected pattern.SYNTAX_ERR (12)
"InvalidModificationError"The object can not be modified in this way.INVALID_MODIFICATION_ERR (13)
"NamespaceError"The operation is not allowed by Namespaces in XML. [XMLNS]NAMESPACE_ERR (14)
"InvalidAccessError"The object does not support the operation or argument.INVALID_ACCESS_ERR (15)
"SecurityError"The operation is insecure.SECURITY_ERR (18)
"NetworkError"A network error occurred.NETWORK_ERR (19)
"AbortError"The operation was aborted.ABORT_ERR (20)
"URLMismatchError"The given URL does not match another URL.URL_MISMATCH_ERR (21)
"QuotaExceededError"The quota has been exceeded.QUOTA_EXCEEDED_ERR (22)
"TimeoutError"The operation timed out.TIMEOUT_ERR (23)
"InvalidNodeTypeError"The supplied node is incorrect or has an incorrect ancestor for this operation.INVALID_NODE_TYPE_ERR (24)
"DataCloneError"The object can not be cloned.DATA_CLONE_ERR (25)
"EncodingError"The encoding operation (either encoded or decoding) failed.
"NotReadableError"The I/O read operation failed.
"UnknownError"The operation failed for an unknown transient reason (e.g. out of memory).
"ConstraintError"A mutation operation in a transaction failed because a constraint was not satisfied.
"DataError"Provided data is inadequate.
"TransactionInactiveError"A request was placed against a transaction which is currently not active, or which is finished.
"ReadOnlyError"The mutating operation was attempted in a "readonly" transaction.
"VersionError"An attempt was made to open a database using a lower version than the existing version.
"OperationError"The operation failed for an operation-specific reason.
-
-
- -
-

3.5 Enumerations

- -

- An enumeration is a definition (matching - Enum) used to declare a type - whose valid values are a set of predefined strings. Enumerations - can be used to restrict the possible - DOMString values that can be assigned to an - attribute or passed to an - operation. -

-
enum identifier { enumeration-values… };
-

- The enumeration values are specified - as a comma-separated list of string literals. - The list of enumeration values - MUST NOT include duplicates. -

-
Warning
-

- It is strongly suggested that enumeration values be all lowercase, - and that multiple words be separated using dashes or not be - separated at all, unless there is a specific reason to use another - value naming scheme. For example, an enumeration value that - indicates an object should be created could be named - "createobject" or 'create-object". - Consider related uses of enumeration values when deciding whether - to dash-separate or not separate enumeration value words so that - similar APIs are consistent. -

-
-

- The behavior when a string value that is not one a valid enumeration value - is used when assigning to an attribute, - or passed as an operation argument, - whose type is the enumeration, is language binding specific. -

-
Note
-

- In the ECMAScript binding, assignment of an invalid string value to an - attribute is ignored, while - passing such a value as an operation argument - results in an exception being thrown. -

-
-

- No extended attributes - defined in this specification are applicable to enumerations. -

- -
[19]Enum"enum" identifier "{" EnumValueList "}" ";"
[20]EnumValueListstring EnumValueListComma
[21]EnumValueListComma"," EnumValueListString
 | - ε
[22]EnumValueListStringstring EnumValueListComma
 | - ε
- -
Example
-

- The following IDL fragment - defines an enumeration - that is used as the type of an attribute - and an operation argument: -

-
IDL
enum MealType { "rice", "noodles", "other" };
-
-interface Meal {
-  attribute MealType type;
-  attribute double size;     // in grams
-
-  void initialize(MealType type, double size);
-};
-

- An ECMAScript implementation would restrict the strings that can be - assigned to the type property or passed to the initializeMeal function - to those identified in the enumeration. -

-
ECMAScript
var meal = getMeal();                // Get an instance of Meal.
-
-meal.initialize("rice", 200);        // Operation invoked as normal.
-
-try {
-  meal.initialize("sandwich", 100);  // Throws a TypeError.
-} catch (e) {
-}
-
-meal.type = "noodles";               // Attribute assigned as normal.
-meal.type = "dumplings";             // Attribute assignment ignored.
-meal.type == "noodles";              // Evaluates to true.
-
-
- -
-

3.6 Callback functions

- - -

- A callback function is a definition (matching - "callback" CallbackRest) used to declare a function type. -

-
callback identifier = return-type (arguments…);
-
Note
-

See also the similarly named callback interfaces.

-
-

- The identifier on the - left of the equals sign gives the name of the callback function - and the return type and argument list (matching ReturnType - and ArgumentList) on the right side of the equals - sign gives the signature of the callback function type. -

-

- Callback functions MUST NOT - be used as the type of a constant. -

-

- The following extended attribute is applicable to callback functions: - [TreatNonObjectAsNull]. -

- -
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | - Interface
[4]CallbackRestOrInterfaceCallbackRest
 | - Interface
[23]CallbackRestidentifier "=" ReturnType "(" ArgumentList ")" ";"
- -
Example
-

- The following IDL fragment defines - a callback function used for an API that - invokes a user-defined function when an operation is complete. -

-
IDL
callback AsyncOperationCallback = void (DOMString status);
-
-interface AsyncOperations {
-  void performOperation(AsyncOperationCallback whenFinished);
-};
-

- In the ECMAScript language binding, a Function object is - passed as the operation argument. -

-
ECMAScript
var ops = getAsyncOperations();  // Get an instance of AsyncOperations.
-
-ops.performOperation(function(status) {
-  window.alert("Operation finished, status is " + status + ".");
-});
-
-
- -
-

3.7 Typedefs

- -

- A typedef is a definition (matching - Typedef) - used to declare a new name for a type. This new name is not exposed - by language bindings; it is purely used as a shorthand for referencing - the type in the IDL. -

-
typedef type identifier;
-

- The type being given a new name is specified after the typedef - keyword (matching Type), and the - identifier token following the - type gives the name. -

-

- The Type MUST NOT - identify the same or another typedef. -

-

- No extended attributes - defined in this specification are applicable to typedefs. -

- -
[24]Typedef"typedef" Type identifier ";"
- -
Example
-

- The following IDL fragment - demonstrates the use of typedefs - to allow the use of a short - identifier instead of a long - sequence type. -

-
IDL
interface Point {
-  attribute double x;
-  attribute double y;
-};
-
-typedef sequence<Point> Points;
-
-interface Widget {
-  boolean pointWithinBounds(Point p);
-  boolean allPointsWithinBounds(Points ps);
-};
-
-
- -
-

3.8 Implements statements

- -

- An implements statement is a definition - (matching ImplementsStatement) - used to declare that all objects implementing an interface A - (identified by the first identifier) - MUST additionally implement interface B - (identified by the second identifier), including all other interfaces that - B inherits from. -

-
identifier-A implements identifier-B;
-

- Transitively, if objects implementing B - are declared with an implements statement - to additionally implement interface C, then all objects implementing - A do additionally implement interface C. -

-

- The two identifiers MUST - identify two different interfaces. -

-

- The interface identified on the left-hand side of an implements statement - MUST NOT inherit - from the interface identifier on the right-hand side, and vice versa. Both identified - interfaces also MUST NOT be - callback interfaces. -

-

- If each implements statement is - considered to be an edge in a directed graph, from a node representing the interface - on the left-hand side of the statement to a node representing the interface on the - right-hand side, then this graph MUST NOT have any cycles. -

-

- Interfaces that a given object implements are partitioned into those that are considered - supplemental interfaces and those that are not. - An interface A is considered to be a - supplemental interface of an object - O if: -

-
    -
  • O implements a different interface B, and the IDL states that - B implements A; or
  • -
  • O implements a different supplemental interface - C, and C inherits from A.
  • -
-
Note
-

- Specification authors are discouraged from writing implements statements - where the interface on the left-hand side - is a supplemental interface. - For example, if author 1 writes: -

-
IDL
interface Window { ... };
-interface SomeFunctionality { ... };
-Window implements SomeFunctionality;
-

- and author 2 later writes: -

-
IDL
interface Gizmo { ... };
-interface MoreFunctionality { ... };
-SomeFunctionality implements MoreFunctionality;
-Gizmo implements SomeFunctionality;
-

- then it might be the case that author 2 is unaware of exactly which - interfaces already are used on the left-hand side of an - implements SomeFunctionality statement, and so has - required more objects implement MoreFunctionality - than he or she expected. -

-

- Better in this case would be for author 2 to write: -

-
IDL
interface Gizmo { ... };
-interface MoreFunctionality { ... };
-Gizmo implements SomeFunctionality;
-Gizmo implements MoreFunctionality;
-
-

- The consequential interfaces of an interface - A are: -

-
    -
  • each interface B where the IDL states A implements B;
  • -
  • each interface that a consequential interface of A inherits from; and
  • -
  • each interface D where the IDL states that C implements D, - where C is a consequential interface of A.
  • -
-

- For a given interface, there MUST NOT - be any member defined on any of its consequential interfaces - whose identifier is the same as any other member defined on any - of those consequential interfaces or on the original interface itself. -

-
Note
-

For example, that precludes the following:

-
IDL
interface A { attribute long x; };
-interface B { attribute long x; };
-A implements B;  // B::x would clash with A::x
-
-interface C { attribute long y; };
-interface D { attribute long y; };
-interface E : D { };
-C implements E;  // D::y would clash with C::y
-
-interface F { };
-interface H { attribute long z; };
-interface I { attribute long z; };
-F implements H;
-F implements I;  // H::z and I::z would clash when mixed in to F
-
-

- No extended attributes - defined in this specification are applicable to - implements statements. -

- -
[25]ImplementsStatementidentifier "implements" identifier ";"
- -
Example
-

- The following IDL fragment - defines two interfaces, stating - that one interface is always implemented on objects implementing the other. -

-
IDL
interface Entry {
-  readonly attribute unsigned short entryType;
-  // ...
-};
-
-interface Observable {
-  void addEventListener(DOMString type,
-                        EventListener listener,
-                        boolean useCapture);
-  // ...
-};
-
-Entry implements Observable;
-

- An ECMAScript implementation would thus have an “addEventListener” - property in the prototype chain of every Entry: -

-
ECMAScript
var e = getEntry();          // Obtain an instance of Entry.
-typeof e.addEventListener;  // Evaluates to "function".
- -

- Note that it is not the case that all Observable - objects implement Entry. -

-
-
- -
-

3.9 Objects implementing interfaces

- -

- In a given implementation of a set of IDL fragments, - an object can be described as being a platform object, a - user object, or neither. There are two kinds of - object that are considered to be platform objects: -

- -

- In a browser, for example, - the browser-implemented DOM objects (implementing interfaces such as Node and - Document) that provide access to a web page’s contents - to ECMAScript running in the page would be platform objects. These objects might be exotic objects, - implemented in a language like C++, or they might be native ECMAScript objects. Regardless, - an implementation of a given set of IDL fragments needs to be able to recognize all platform objects - that are created by the implementation. This might be done by having some internal state that records whether - a given object is indeed a platform object for that implementation, or perhaps by observing - that the object is implemented by a given internal C++ class. How exactly platform objects - are recognised by a given implementation of a set of IDL fragments is implementation specific. -

-

- All other objects in the system would not be treated as platform objects. For example, assume that - a web page opened in a browser loads an ECMAScript library that implements DOM Core. This library - would be considered to be a different implementation from the browser provided implementation. - The objects created by the ECMAScript library that implement the Node interface - will not be treated as platform objects that implement Node by the browser implementation. -

-

- User objects are those that authors would create, implementing - callback interfaces that the Web APIs use to be able to invoke author-defined - operations or to send and receive values to the author’s program through - manipulating the object’s attributes. In a web page, an ECMAScript object - that implements the EventListener interface, which is - used to register a callback that the DOM Events implementation invokes, would be considered - to be a user object. -

-

- Note that user objects can only implement callback interfaces - and platform objects can only implement non-callback interfaces. -

- -
- - - -
-

3.10 Types

- -

- This section lists the types supported by Web IDL, the set of values - corresponding to each type, and how constants - of that type are represented. -

-

- The following types are known as integer types: - byte, - octet, - short, - unsigned short, - long, - unsigned long, - long long and - unsigned long long. -

-

- The following types are known as numeric types: - the integer types, - float, - unresticted float, - double and - unrestricted double. -

-

- The primitive types are - boolean and the numeric types. -

-

- The string types are - DOMString, all enumeration types, - ByteString and USVString. -

-

- The exception types are - Error and DOMException. -

-

- The typed array types are - Int8Array, - Int16Array, - Int32Array, - Uint8Array, - Uint16Array, - Uint32Array, - Uint8ClampedArray, - Float32Array and - Float64Array. -

-

- The buffer source types - are ArrayBuffer, - DataView, - and the typed array types. -

-

- The object type, - all interface types - and the exception types - are known as object types. -

-

- Every type has a type name, which - is a string, not necessarily unique, that identifies the type. - Each sub-section below defines what the type name is for each - type. -

-

- When conversions are made from language binding specific types to - IDL types in order to invoke an operation - or assign a value to an attribute, - all conversions necessary will be performed before the - specified functionality of the operation or attribute assignment - is carried out. If the conversion cannot - be performed, then the operation will not run or - the attribute will not be updated. In some language bindings, - type conversions could result in an exception being thrown. - In such cases, these exceptions will be propagated to the - code that made the attempt to invoke the operation or - assign to the attribute. -

- -
[73]TypeSingleType
 | - UnionType Null
[74]SingleTypeNonAnyType
 | - "any"
[75]UnionType"(" UnionMemberType "or" UnionMemberType UnionMemberTypes ")"
[76]UnionMemberTypeNonAnyType
 | - UnionType Null
[77]UnionMemberTypes"or" UnionMemberType UnionMemberTypes
 | - ε
[78]NonAnyTypePrimitiveType Null
 | - PromiseType Null
 | - "ByteString" Null
 | - "DOMString" Null
 | - "USVString" Null
 | - identifier Null
 | - "sequence" "<" Type ">" Null
 | - "object" Null
 | - "Error" Null
 | - "DOMException" Null
 | - BufferRelatedType Null
[80]ConstTypePrimitiveType Null
 | - identifier Null
[81]PrimitiveTypeUnsignedIntegerType
 | - UnrestrictedFloatType
 | - "boolean"
 | - "byte"
 | - "octet"
[82]UnrestrictedFloatType"unrestricted" FloatType
 | - FloatType
[83]FloatType"float"
 | - "double"
[84]UnsignedIntegerType"unsigned" IntegerType
 | - IntegerType
[85]IntegerType"short"
 | - "long" OptionalLong
[86]OptionalLong"long"
 | - ε
[87]PromiseType"Promise" "<" ReturnType ">"
[88]Null"?"
 | - ε
- -
-

3.10.1 any

- -

- The any type is the union of all other possible - non-union types. - Its type name is “Any”. -

-

- The any type is like - a discriminated union type, in that each of its values has a - specific non-any type - associated with it. For example, one value of the - any type is the - unsigned long - 150, while another is the long 150. - These are distinct values. -

-

- The particular type of an any - value is known as its specific type. - (Values of union types also have - specific types.) -

-
- -
-

3.10.2 boolean

- -

- The boolean type has two values: - true and false. -

-

- boolean constant values in IDL are - represented with the true and - false tokens. -

-

- The type name of the - boolean type is “Boolean”. -

-
- -
-

3.10.3 byte

- -

- The byte type is a signed integer - type that has values in the range [−128, 127]. -

-

- byte constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - byte type is “Byte”. -

-
- -
-

3.10.4 octet

- -

- The octet type is an unsigned integer - type that has values in the range [0, 255]. -

-

- octet constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - octet type is “Octet”. -

-
- -
-

3.10.5 short

- -

- The short type is a signed integer - type that has values in the range [−32768, 32767]. -

-

- short constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - short type is “Short”. -

-
- -
-

3.10.6 unsigned short

- -

- The unsigned short type is an unsigned integer - type that has values in the range [0, 65535]. -

-

- unsigned short constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - unsigned short type is “UnsignedShort”. -

-
- -
-

3.10.7 long

- -

- The long type is a signed integer - type that has values in the range [−2147483648, 2147483647]. -

-

- long constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - long type is “Long”. -

-
- -
-

3.10.8 unsigned long

- -

- The unsigned long type is an unsigned integer - type that has values in the range [0, 4294967295]. -

-

- unsigned long constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - unsigned long type is “UnsignedLong”. -

-
- -
-

3.10.9 long long

- -

- The long long type is a signed integer - type that has values in the range [−9223372036854775808, 9223372036854775807]. -

-

- long long constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - long long type is “LongLong”. -

-
- -
-

3.10.10 unsigned long long

- -

- The unsigned long long type is an unsigned integer - type that has values in the range [0, 18446744073709551615]. -

-

- unsigned long long constant values in IDL are - represented with integer - tokens. -

-

- The type name of the - unsigned long long type is “UnsignedLongLong”. -

-
- -
-

3.10.11 float

- -

- The float type is a floating point numeric - type that corresponds to the set of finite single-precision 32 bit - IEEE 754 floating point numbers. [IEEE-754] -

-

- float constant values in IDL are - represented with float - tokens. -

-

- The type name of the - float type is “Float”. -

-
Warning
-

- Unless there are specific reasons to use a 32 bit floating point type, - specifications SHOULD use - double rather than float, - since the set of values that a double can - represent more closely matches an ECMAScript Number. -

-
-
- -
-

3.10.12 unrestricted float

- -

- The unrestricted float type is a floating point numeric - type that corresponds to the set of all possible single-precision 32 bit - IEEE 754 floating point numbers, finite and non-finite. [IEEE-754] -

-

- unrestricted float constant values in IDL are - represented with float - tokens. -

-

- The type name of the - unrestricted float type is “UnrestrictedFloat”. -

-
- -
-

3.10.13 double

- -

- The double type is a floating point numeric - type that corresponds to the set of finite double-precision 64 bit - IEEE 754 floating point numbers. [IEEE-754] -

-

- double constant values in IDL are - represented with float - tokens. -

-

- The type name of the - double type is “Double”. -

-
- -
-

3.10.14 unrestricted double

- -

- The unrestricted double type is a floating point numeric - type that corresponds to the set of all possible double-precision 64 bit - IEEE 754 floating point numbers, finite and non-finite. [IEEE-754] -

-

- unrestricted double constant values in IDL are - represented with float - tokens. -

-

- The type name of the - unrestricted double type is “UnrestrictedDouble”. -

-
- -
-

3.10.15 DOMString

- -

- The DOMString type - corresponds to the set of all possible sequences of code units. - Such sequences are commonly interpreted as UTF-16 encoded strings [RFC2781] - although this is not required. - While DOMString is defined to be an OMG IDL boxed - sequence<unsigned short> - valuetype in DOM Level 3 Core - ([DOM3CORE], section 1.2.1), - this document defines DOMString to be an intrinsic type so as to avoid - special casing that sequence type in various situations where a - string is required. -

-
Note
-

- Note also that null - is not a value of type DOMString. - To allow null, a - nullable DOMString, - written as DOMString? in IDL, needs to be used. -

-
-

- Nothing in this specification requires a DOMString - value to be a valid UTF-16 string. For example, a DOMString - value might include unmatched surrogate pair characters. However, authors - of specifications using Web IDL might want to obtain a sequence of - Unicode scalar values given a particular sequence of - code units. - The following algorithm defines a way to - convert a DOMString to a sequence of Unicode scalar values: -

-
    -
  1. Let S be the DOMString value.
  2. -
  3. Let n be the length of S.
  4. -
  5. Initialize i to 0.
  6. -
  7. Initialize U to be an empty sequence of Unicode characters.
  8. -
  9. While i < n: -
      -
    1. Let c be the code unit in S at index i.
    2. -
    3. Depending on the value of c: -
      -
      c < 0xD800 or c > 0xDFFF
      -
      Append to U the Unicode character with code point c.
      - -
      0xDC00 ≤ c ≤ 0xDFFF
      -
      Append to U a U+FFFD REPLACEMENT CHARACTER.
      - -
      0xD800 ≤ c ≤ 0xDBFF
      -
      -
        -
      1. If i = n−1, then append to U a U+FFFD REPLACEMENT CHARACTER.
      2. -
      3. Otherwise, i < n−1: -
          -
        1. Let d be the code unit in S at index - i+1.
        2. -
        3. If 0xDC00 ≤ d ≤ 0xDFFF, then: -
            -
          1. Let a be c & 0x3FF.
          2. -
          3. Let b be d & 0x3FF.
          4. -
          5. Append to U the Unicode character with - code point 216+210a+b.
          6. -
          7. Set i to i+1.
          8. -
          -
        4. -
        5. Otherwise, d < 0xDC00 or d > 0xDFFF. - Append to U a U+FFFD REPLACEMENT CHARACTER.
        6. -
        -
      4. -
      -
      -
      -
    4. -
    5. Set i to i+1.
    6. -
    -
  10. -
  11. Return U.
  12. -
-

- There is no way to represent a constant DOMString - value in IDL, although DOMString dictionary member - and operation optional argument default values - can be specified using a string literal. -

-

- The type name of the - DOMString type is “String”. -

-
- -
-

3.10.16 ByteString

- -

- The ByteString type - corresponds to the set of all possible sequences of bytes. - Such sequences might be interpreted as UTF-8 encoded strings [RFC3629] - or strings in some other 8-bit-per-code-unit encoding, although this is not required. -

-

- There is no way to represent a constant ByteString - value in IDL. -

-

- The type name of the - ByteString type is “ByteString”. -

-
Warning
-

- Specifications SHOULD only use - ByteString for interfacing with protocols - that use bytes and strings interchangably, such as HTTP. In general, - strings SHOULD be represented with - DOMString values, even if it is expected - that values of the string will always be in ASCII or some - 8 bit character encoding. Sequences or Typed Arrays - with octet or byte - elements SHOULD be used for holding - 8 bit data rather than ByteString. - [TYPEDARRAYS] -

-
-
- -
-

3.10.17 USVString

- -

- The USVString type - corresponds to the set of all possible sequences of - Unicode scalar values, - which are all of the Unicode code points apart from the - surrogate code points. -

-

- There is no way to represent a constant USVString - value in IDL, although USVString dictionary member - and operation optional argument default values - can be specified using a string literal. -

-

- The type name of the - USVString type is “USVString”. -

-
Warning
-

- Specifications SHOULD only use - USVString for APIs that perform - text processing and need a string of Unicode - scalar values to operate on. Most APIs that use strings - should instead be using DOMString, - which does not make any interpretations of the code units - in the string. When in doubt, use DOMString. -

-
-
- -
-

3.10.18 object

- -

- The object type corresponds to the set of - all possible non-null object references. -

-

- There is no way to represent a constant object - value in IDL. -

-

- To denote a type that includes all possible object references plus the - null value, use the nullable type - object?. -

-

- The type name of the - object type is “Object”. -

-
- -
-

3.10.19 Interface types

- -

- An identifier that - identifies an interface is used to refer to - a type that corresponds to the set of all possible non-null references to objects that - implement that interface. -

-

- For non-callback interfaces, an IDL value of the interface type is represented just - by an object reference. For callback interfaces, an IDL value of the interface type - is represented by a tuple of an object reference and a callback context. - The callback context is a language - binding specific value, and is used to store information about the execution context at - the time the language binding specific object reference is converted to an IDL value. -

-
Note
-

For ECMAScript objects, the callback context is used - to hold a reference to the - incumbent script - [HTML] at the time the Object value - is converted to an IDL callback interface type value. See - section 4.2.20 .

-
-

- There is no way to represent a constant object reference value for - a particular interface type in IDL. -

-

- To denote a type that includes all possible references to objects implementing - the given interface plus the null value, - use a nullable type. -

-

- The type name of an interface type - is the identifier of the interface. -

-
- -
-

3.10.20 Dictionary types

- -

- An identifier that - identifies a dictionary is used to refer to - a type that corresponds to the set of all dictionaries that adhere to - the dictionary definition. -

-

- There is no way to represent a constant dictionary value in IDL. -

-

- The type name of a dictionary type - is the identifier of the dictionary. -

-
- -
-

3.10.21 Enumeration types

- -

- An identifier that - identifies an enumeration is used to - refer to a type whose values are the set of strings (sequences of - code units, as with - DOMString) that are the - enumeration’s values. -

-

- Like DOMString, there is no way to represent a constant enumeration - value in IDL, although enumeration-typed dictionary member - default values can be specified using a - string literal. -

-

- The type name of an enumeration type - is the identifier of the enumeration. -

-
- -
-

3.10.22 Callback function types

- -

- An identifier that identifies - a callback function is used to refer to - a type whose values are references to objects that are functions with the given signature. -

-

- An IDL value of the callback function type is represented by a tuple of an object - reference and a callback context. -

-
Note
-

As with callback interface types, the callback context is used - to hold a reference to the - incumbent script - [HTML] at the time an ECMAScript Object value - is converted to an IDL callback function type value. See - section 4.2.23 .

-
-

- There is no way to represent a constant callback function - value in IDL. -

-

- The type name of a callback function type - is the identifier of the callback function. -

-
- -
-

3.10.23 Nullable types — T?

- -

- A nullable type is an IDL type constructed - from an existing type (called the inner type), - which just allows the additional value null - to be a member of its set of values. Nullable types - are represented in IDL by placing a U+003F QUESTION MARK ("?") - character after an existing type. The inner type MUST NOT - be any, - another nullable type, or a union type - that itself has includes a nullable type - or has a dictionary type as one of its - flattened member types. -

-
Note
-

Although dictionary types can in general be nullable, they cannot when used - as the type of an operation argument or a dictionary member.

-
-

- Nullable type constant values in IDL are represented in the same way that - constant values of their inner type - would be represented, or with the null token. -

-

- The type name of a nullable type - is the concatenation of the type name of the inner type T and - the string “OrNull”. -

-
Example
-

- For example, a type that allows the values true, - false and null - is written as boolean?: -

-
IDL
interface MyConstants {
-  const boolean? ARE_WE_THERE_YET = false;
-};
-

- The following interface has two - attributes: one whose value can - be a DOMString or the null - value, and another whose value can be a reference to a Node - object or the null value: -

-
IDL
interface Node {
-  readonly attribute DOMString? namespaceURI;
-  readonly attribute Node? parentNode;
-  // ...
-};
-
-
- -
-

3.10.24 Sequences — sequence<T>

- -

- The sequence<T> - type is a parameterized type whose values are (possibly zero-length) sequences of - values of type T. -

-

- Sequences are always passed by value. In - language bindings where a sequence is represented by an object of - some kind, passing a sequence to a platform object - will not result in a reference to the sequence being kept by that object. - Similarly, any sequence returned from a platform object - will be a copy and modifications made to it will not be visible to the platform object. -

-

- There is no way to represent a constant sequence value in IDL. -

-

- Sequences MUST NOT be used as the - type of an attribute or - constant. -

-
Note
-

- This restriction exists so that it is clear to specification writers - and API users that sequences - are copied rather than having references - to them passed around. Instead of a writable attribute of a sequence - type, it is suggested that a pair of operations to get and set the - sequence is used. -

-
-

- The type name of a sequence type - is the concatenation of the type name for T and - the string “Sequence”. -

-
- -
-

3.10.25 Promise types — Promise<T>

- -

- A promise type is a parameterized type - whose values are references to objects that “is used as a place holder - for the eventual results of a deferred (and possibly asynchronous) computation - result of an asynchronous operation” [ECMA-262]. - See section 25.4 - of the ECMAScript specification for details on the semantics of promise objects. -

-

- There is no way to represent a promise value in IDL. -

-

- The type name of a promise type - is the concatenation of the type name for T and - the string “Promise”. -

-
- -
-

3.10.26 Union types

- -

- A union type is a type whose set of values - is the union of those in two or more other types. Union types (matching - UnionType) - are written as a series of types separated by the or keyword - with a set of surrounding parentheses. - The types which comprise the union type are known as the - union’s member types. -

-
Note
-

- For example, you might write (Node or DOMString) - or (double or sequence<double>). When applying a - ? suffix to a - union type - as a whole, it is placed after the closing parenthesis, - as in (Node or DOMString)?. -

-

- Note that the member types - of a union type do not descend into nested union types. So for - (double or (sequence<long> or Event) or (Node or DOMString)?) the member types - are double, (sequence<long> or Event) and - (Node or DOMString)?. -

-
-

- Like the any type, values of - union types have a specific type, - which is the particular member type - that matches the value. -

-

- The flattened member types - of a union type is a set of types - determined as follows: -

-
    -
  1. Let T be the union type.
  2. -
  3. Initialize S to ∅.
  4. -
  5. For each member type U of T: -
      -
    1. If U is a nullable type, then - set U to be the inner type of U.
    2. -
    3. If U is a union type, then - add to S the flattened member types - of U.
    4. -
    5. Otherwise, U is not a union type. - Add U to S.
    6. -
    -
  6. -
  7. Return S.
  8. -
-
Note
-

- For example, the flattened member types - of the union type - (Node or (sequence<long> or Event) or (XMLHttpRequest or DOMString)? or sequence<(sequence<double> or NodeList)>) - are the six types Node, sequence<long>, Event, - XMLHttpRequest, DOMString and - sequence<(sequence<double> or NodeList)>. -

-
-

- The number of nullable member types - of a union type is an integer - determined as follows: -

-
    -
  1. Let T be the union type.
  2. -
  3. Initialize n to 0.
  4. -
  5. For each member type U of T: -
      -
    1. If U is a nullable type, then: -
        -
      1. Set n to n + 1.
      2. -
      3. Set U to be the inner type of U.
      4. -
      -
    2. -
    3. If U is a union type, then: -
        -
      1. Let m be the number - of nullable member types of U.
      2. -
      3. Set n to n + m.
      4. -
    4. -
    -
  6. -
  7. Return n.
  8. -
-

- The any type MUST NOT - be used as a union member type. -

-

- The number of nullable member types - of a union type MUST - be 0 or 1, and if it is 1 then the union type MUST also not have - a dictionary type in its - flattened member types. -

-

- A type includes a nullable type if: -

- -

- Each pair of flattened member types - in a union type, T and U, - MUST be distinguishable. -

-

- Union type constant values - in IDL are represented in the same way that constant values of their - member types would be - represented. -

-

- The type name of a union - type is formed by taking the type names of each member type, in order, - and joining them with the string “Or”. -

-
[75]UnionType"(" UnionMemberType "or" UnionMemberType UnionMemberTypes ")"
[76]UnionMemberTypeNonAnyType
 | - UnionType Null
[77]UnionMemberTypes"or" UnionMemberType UnionMemberTypes
 | - ε
[78]NonAnyTypePrimitiveType Null
 | - PromiseType Null
 | - "ByteString" Null
 | - "DOMString" Null
 | - "USVString" Null
 | - identifier Null
 | - "sequence" "<" Type ">" Null
 | - "object" Null
 | - "Error" Null
 | - "DOMException" Null
 | - BufferRelatedType Null
-
- - - -
-

3.10.27 Error

- -

The Error type corresponds to the - set of all possible non-null references to exception objects, including - simple exceptions - and DOMExceptions.

-

- There is no way to represent a constant Error - value in IDL. -

-

- The type name of the - Error type is “Error”. -

-
- -
-

3.10.28 DOMException

- -

The DOMException type corresponds to the - set of all possible non-null references to objects - representing DOMExceptions.

-

- There is no way to represent a constant DOMException - value in IDL. -

-

- The type name of the - DOMException type is “DOMException”. -

-
- -
-

3.10.29 Buffer source types

- -

- There are a number of types that correspond to sets of all possible non-null - references to objects that represent a buffer of data or a view on to a buffer of - data. The table below lists these types and the kind of buffer or view they represent. -

- - - - - - - - - - - - -
TypeKind of buffer
ArrayBufferAn object that holds a pointer (which may be null) to a buffer of a fixed number of bytes
DataViewA view on to an ArrayBuffer that allows typed access to integers and floating point values stored at arbitrary offsets into the buffer
- Int8Array,
- Int16Array,
- Int32Array
A view on to an ArrayBuffer that exposes it as an array of two’s complement signed integers of the given size in bits
- Uint8Array,
- Uint16Array,
- Uint32Array
A view on to an ArrayBuffer that exposes it as an array of unsigned integers of the given size in bits
Uint8ClampedArrayA view on to an ArrayBuffer that exposes it as an array of unsigned 8 bit integers with clamped conversions
- Float32Array,
- Float64Array
A view on to an ArrayBuffer that exposes it as an array of IEEE 754 floating point numbers of the given size in bits
-
Note
-

These types all correspond to classes defined in ECMAScript.

-
-

- To detach an ArrayBuffer - is to set its buffer pointer to null. -

-

- There is no way to represent a constant value of any of these types in IDL. -

-

- The type name of all - of these types is the name of the type itself. -

-

- At the specification prose level, IDL buffer source types - are simply references to objects. To inspect or manipulate the bytes inside the buffer, - specification prose MUST first either - get a reference to the bytes held by the buffer source - or get a copy of the bytes held by the buffer source. - With a reference to the buffer source’s bytes, specification prose can get or set individual - byte values using that reference. -

-
Warning
-

- Extreme care must be taken when writing specification text that gets a reference - to the bytes held by a buffer source, as the underyling data can easily be changed - by the script author or other APIs at unpredictable times. If you are using a buffer source type - as an operation argument to obtain a chunk of binary data that will not be modified, - it is strongly recommended to get a copy of the buffer source’s bytes at the beginning - of the prose defining the operation. -

-

- Requiring prose to explicitly get a reference to or copy of the bytes is intended to - help specification reviewers look for problematic uses of these buffer source types. -

-
-
Note
-

- When designing APIs that take a buffer, it is recommended to use the - BufferSource typedef rather than ArrayBuffer - or any of the view types. -

-

- When designing APIs that create and return a buffer, it is recommended - to use the ArrayBuffer type rather than - Uint8Array. -

-
-

- Attempting to get a reference to or - get a copy of the bytes held by a buffer source - when the ArrayBuffer has been detached - will fail in a language binding-specific manner. -

-
Note
-

See section 4.2.30 below for - how interacting with buffer source types works in the ECMAScript language binding.

-
- -
- - -
- -
-

3.11 Extended attributes

- -

- An extended attribute is an annotation - that can appear on - definitions, - interface members, - dictionary members, - and operation arguments, and - is used to control how language bindings will handle those constructs. - Extended attributes are specified with an - ExtendedAttributeList, - which is a square bracket enclosed, comma separated list of - ExtendedAttributes. -

-

- The ExtendedAttribute - grammar symbol matches nearly any sequence of tokens, however the - extended attributes - defined in this document only accept a more restricted syntax. - Any extended attribute encountered in an - IDL fragment is - - matched against the following six grammar symbols to determine - which form (or forms) it is in: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Grammar symbolFormExample
- ExtendedAttributeNoArgs - - takes no arguments - - [Replaceable] -
- ExtendedAttributeArgList - - takes an argument list - - [Constructor(double x, double y)] -
- ExtendedAttributeNamedArgList - - takes a named argument list - - [NamedConstructor=Image(DOMString src)] -
- ExtendedAttributeIdent - - takes an identifier - - [PutForwards=name] -
- ExtendedAttributeIdentList - - takes an identifier list - - [Exposed=(Window,Worker)] -
- -

- This specification defines a number of extended attributes that - are applicable to the ECMAScript language binding, which are described in - section 4.3 . - Each extended attribute definition will state which of the above - six forms are allowed. -

- -
[65]ExtendedAttributeList"[" ExtendedAttribute ExtendedAttributes "]"
 | - ε
[66]ExtendedAttributes"," ExtendedAttribute ExtendedAttributes
 | - ε
[67]ExtendedAttribute - "(" ExtendedAttributeInner ")" ExtendedAttributeRest -
 | - "[" ExtendedAttributeInner "]" ExtendedAttributeRest -
 | - "{" ExtendedAttributeInner "}" ExtendedAttributeRest -
 | - Other ExtendedAttributeRest -
[68]ExtendedAttributeRestExtendedAttribute
 | - ε
[69]ExtendedAttributeInner - "(" ExtendedAttributeInner ")" ExtendedAttributeInner -
 | - "[" ExtendedAttributeInner "]" ExtendedAttributeInner -
 | - "{" ExtendedAttributeInner "}" ExtendedAttributeInner -
 | - OtherOrComma ExtendedAttributeInner -
 | - ε -
[70]Other - integer
 | - float
 | - identifier
 | - string
 | - other -
 | - "-"
 | - "-Infinity"
 | - "."
 | - "..."
 | - ":"
 | - ";"
 | - "<"
 | - "="
 | - ">"
 | - "?" -
 | - "ByteString"
 | - "DOMString"
 | - "Infinity"
 | - "NaN"
 | - "USVString"
 | - "any"
 | - "boolean"
 | - "byte"
 | - "double"
 | - "false"
 | - "float" -
 | - "long"
 | - "null"
 | - "object"
 | - "octet"
 | - "or"
 | - "optional"
 | - "sequence" -
 | - "short"
 | - "true"
 | - "unsigned"
 | - "void" -
 | - ArgumentNameKeyword -
 | - BufferRelatedType -
[72]OtherOrCommaOther
 | - ","
[90]IdentifierListidentifier Identifiers
[91]Identifiers"," identifier Identifiers
 | - ε
[92]ExtendedAttributeNoArgsidentifier
[93]ExtendedAttributeArgListidentifier "(" ArgumentList ")"
[94]ExtendedAttributeIdentidentifier "=" identifier
[95]ExtendedAttributeIdentListidentifier "=" "(" IdentifierList ")"
[96]ExtendedAttributeNamedArgListidentifier "=" identifier "(" ArgumentList ")"
- -
-
- -
-

4. ECMAScript binding

- -

- This section describes how definitions written with the IDL defined in - section 3. correspond to particular constructs - in ECMAScript, as defined by the ECMAScript Language Specification 6th Edition - [ECMA-262]. -

-

- Objects defined in this section have internal properties as described in - ECMA-262 sections 9.1 and - 9.3.1 unless otherwise specified, in which case one or - more of the following are redefined in accordance with the rules for exotic objects: - [[Call]], - [[Set]], - [[DefineOwnProperty]], - [[GetOwnProperty]], - [[Delete]] and - [[HasInstance]]. -

-

- Unless otherwise specified, the [[Extensible]] internal property - of objects defined in this section has the value true. -

-

- Unless otherwise specified, the [[Prototype]] internal property - of objects defined in this section is the Object prototype object. -

-

- Some objects described in this section are defined to have a class string, - which is the string to include in the string returned from Object.prototype.toString. - If an object has a class string, then the object MUST, - at the time it is created, have a property whose name is the @@toStringTag symbol - and whose value is the specified string. -

- -

- If an object is defined to be a function object, then - it has characteristics as follows: -

- - -

- Algorithms in this section use the conventions described in ECMA-262 - section 5.2, such as the use of steps and substeps, the use of mathematical - operations, and so on. The - ToBoolean, - ToNumber, - ToUint16, - ToInt32, - ToUint32, - ToString, - ToObject, - IsAccessorDescriptor and - IsDataDescriptor abstract operations and the - Type(x) - notation referenced in this section are defined in ECMA-262 sections 6 and 7. -

-

- When an algorithm says to “throw a SomethingError” then this means to - construct a new ECMAScript SomethingError object and to throw it, - just as the algorithms in ECMA-262 do. -

-

- Note that algorithm steps can call in to other algorithms and abstract operations and - not explicitly handle exceptions that are thrown from them. When an exception - is thrown by an algorithm or abstract operation and it is not explicitly - handled by the caller, then it is taken to end the algorithm and propagate out - to its caller, and so on. -

-
Example
-

- Consider the following algorithm: -

-
    -
  1. Let x be the ECMAScript value passed in to this algorithm.
  2. -
  3. Let y be the result of calling ToString(x).
  4. -
  5. Return y.
  6. -
-

- Since ToString can throw an exception (for example if passed the object - ({ toString: function() { throw 1 } })), and the exception is - not handled in the above algorithm, if one is thrown then it causes this - algorithm to end and for the exception to propagate out to its caller, if there - is one. -

-
- -
-

4.1 ECMAScript environment

-

- In an ECMAScript implementation of a given set of - IDL fragments, - there will exist a number of ECMAScript objects that correspond to - definitions in those IDL fragments. - These objects are termed the initial objects, - and comprise the following: -

- -

- Each ECMAScript global environment ([ECMA-262], section 8.2) - MUST have its own unique set of each of - the initial objects, created - before control enters any ECMAScript execution context associated with the - environment, but after the global object for that environment is created. The [[Prototype]]s - of all initial objects in a given global environment MUST come from - that same global environment. -

-
Example
-

- In an HTML user agent, multiple global environments can exist when - multiple frames or windows are created. Each frame or window will have - its own set of initial objects, - which the following HTML document demonstrates: -

-
HTML
<!DOCTYPE html>
-<title>Different global environments</title>
-<iframe id=a></iframe>
-<script>
-var iframe = document.getElementById("a");
-var w = iframe.contentWindow;              // The global object in the frame
-
-Object == w.Object;                        // Evaluates to false, per ECMA-262
-Node == w.Node;                            // Evaluates to false
-iframe instanceof w.Node;                  // Evaluates to false
-iframe instanceof w.Object;                // Evaluates to false
-iframe.appendChild instanceof Function;    // Evaluates to true
-iframe.appendChild instanceof w.Function;  // Evaluates to false
-</script>
-
-

- Unless otherwise specified, each ECMAScript global environment exposes - all interfaces - that the implementation supports. If a given ECMAScript global environment does not - expose an interface, then the requirements given in - section 4.5 are - not followed for that interface. -

-
Note
-

- This allows, for example, ECMAScript global environments for Web Workers to expose - different sets of supported interfaces from those exposed in environments - for Web pages. -

-
-
- -
-

4.2 ECMAScript type mapping

- -

- This section describes how types in the IDL map to types in ECMAScript. -

-

- Each sub-section below describes how values of a given IDL type are represented - in ECMAScript. For each IDL type, it is described how ECMAScript values are - converted to an IDL value - when passed to a platform object expecting that type, and how IDL values - of that type are converted to ECMAScript values - when returned from a platform object. -

- -
-

4.2.1 any

- -

- Since the IDL any type - is the union of all other IDL types, it can correspond to any - ECMAScript value type. -

-

- How to convert an ECMAScript value to an IDL any value depends on the type of the - ECMAScript value: -

-
-
The undefined value
-
- The IDL value is an - object reference - to a special object that represents the ECMAScript - undefined value. -
-
The null value
-
- The IDL value is the null - object? reference. -
-
A Boolean value
-
- The IDL value is the - boolean - value that represents the same truth value. -
-
A Number value
-
- The IDL value is that which is obtained - by following the rules for converting the - Number to an IDL - unrestricted double value, - as described in section 4.2.15 , - . -
-
A String value
-
- The IDL value is that which is obtained - by following the rules for converting the - String to an IDL - DOMString value, - as described in section 4.2.16 , - . -
-
An object value
-
- The IDL value is an - object value that - references the same object. -
-
-

- An IDL any value is - converted to an ECMAScript value - as follows. If the value is an object - reference to a special object that represents an ECMAScript undefined - value, then it is converted to the ECMAScript - undefined value. Otherwise, - the rules for converting the specific type - of the IDL any value - as described in the remainder of this section are performed. -

-
- -
-

4.2.2 void

- -

- The only place that the void type may appear - in IDL is as the return type of an - operation. Functions on platform objects - that implement an operation whose IDL specifies a - void return type MUST return the - undefined value. -

-

- ECMAScript functions that implement an operation whose IDL - specifies a void return type - MAY return any value, which will be discarded. -

-
- -
-

4.2.3 boolean

- -

- An ECMAScript value V is - converted - to an IDL boolean value - by running the following algorithm: -

-
    -
  1. Let x be the result of computing ToBoolean(V).
  2. -
  3. Return the IDL boolean value that is the one that represents the same truth value as the ECMAScript Boolean value x.
  4. -
-

- The IDL boolean value true - is converted to - the ECMAScript true value and the IDL boolean - value false is converted to the ECMAScript - false value. -

-
- -
-

4.2.4 byte

- -

- An ECMAScript value V is - converted - to an IDL byte value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < −27 or x > 27 − 1, then throw a TypeError.
    6. -
    7. Return the IDL byte value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, −27), 27 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL byte value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL byte value that represents 0.
  8. -
  9. Set x to sign(x) * floor(abs(x)).
  10. -
  11. Set x to x modulo 28.
  12. -
  13. If x ≥ 27, return the IDL byte value that represents the same numeric value as x − 28. - Otherwise, return the IDL byte value that represents the same numeric value as x.
  14. -
-

- The result of converting - an IDL byte value to an ECMAScript - value is a Number that represents - the same numeric value as the IDL byte value. - The Number value will be an integer in the range [−128, 127]. -

-
- -
-

4.2.5 octet

- -

- An ECMAScript value V is - converted - to an IDL octet value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < 0 or x > 28 − 1, then throw a TypeError.
    6. -
    7. Return the IDL octet value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, 0), 28 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL octet value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL octet value that represents 0.
  8. -
  9. Set x to sign(x) * floor(abs(x)).
  10. -
  11. Set x to x modulo 28.
  12. -
  13. Return the IDL octet value that represents the same numeric value as x.
  14. -
-

- The result of converting - an IDL octet value to an ECMAScript - value is a Number that represents - the same numeric value as the IDL - octet value. - The Number value will be an integer in the range [0, 255]. -

-
- -
-

4.2.6 short

- -

- An ECMAScript value V is - converted - to an IDL short value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < −215 or x > 215 − 1, then throw a TypeError.
    6. -
    7. Return the IDL short value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, −215), 215 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL short value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL short value that represents 0.
  8. -
  9. Set x to sign(x) * floor(abs(x)).
  10. -
  11. Set x to x modulo 216.
  12. -
  13. If x ≥ 215, return the IDL short value that represents the same numeric value as x − 216. - Otherwise, return the IDL short value that represents the same numeric value as x.
  14. -
-

- The result of converting - an IDL short value to an ECMAScript - value is a Number that represents the - same numeric value as the IDL - short value. - The Number value will be an integer in the range [−32768, 32767]. -

-
- -
-

4.2.7 unsigned short

- -

- An ECMAScript value V is - converted - to an IDL unsigned short value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < 0 or x > 216 − 1, then throw a TypeError.
    6. -
    7. Return the IDL unsigned short value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, 0), 216 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL unsigned short value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. Set x to ToUint16(x).
  8. -
  9. Return the IDL unsigned short value that represents the same numeric value as x.
  10. -
-

- The result of converting - an IDL unsigned short value to an ECMAScript - value is a Number that - represents the same numeric value as the IDL - unsigned short value. - The Number value will be an integer in the range [0, 65535]. -

-
- -
-

4.2.8 long

- -

- An ECMAScript value V is - converted - to an IDL long value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < −231 or x > 231 − 1, then throw a TypeError.
    6. -
    7. Return the IDL long value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, −231), 231 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL long value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. Set x to ToInt32(x).
  8. -
  9. Return the IDL long value that represents the same numeric value as x.
  10. -
-

- The result of converting - an IDL long value to an ECMAScript - value is a Number that - represents the same numeric value as the IDL - long value. - The Number value will be an integer in the range [−2147483648, 2147483647]. -

-
- -
-

4.2.9 unsigned long

- -

- An ECMAScript value V is - converted - to an IDL unsigned long value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < 0 or x > 232 − 1, then throw a TypeError.
    6. -
    7. Return the IDL unsigned long value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, 0), 232 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL unsigned long value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. Set x to ToUint32(x).
  8. -
  9. Return the IDL unsigned long value that represents the same numeric value as x.
  10. -
-

- The result of converting - an IDL unsigned long value to an ECMAScript - value is a Number that - represents the same numeric value as the IDL - unsigned long value. - The Number value will be an integer in the range [0, 4294967295]. -

-
- -
-

4.2.10 long long

- -

- An ECMAScript value V is - converted - to an IDL long long value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < −253 + 1 or x > 253 − 1, then throw a TypeError.
    6. -
    7. Return the IDL long long value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, −253 + 1), 253 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL long long value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL long long value that represents 0.
  8. -
  9. Set x to sign(x) * floor(abs(x)).
  10. -
  11. Set x to x modulo 264.
  12. -
  13. If x is greater than or equal to 263, then set x to x − 264.
  14. -
  15. Return the IDL long long value that represents the same numeric value as x.
  16. -
-

- The result of converting - an IDL long long value to an ECMAScript - value is a Number value that - represents the closest numeric value to the long long, - choosing the numeric value with an even significand if there are - two equally close values ( - [ECMA-262] - , section 6.1.6). - If the long long is in the range - [−253 + 1, 253 − 1], then the Number - will be able to represent exactly the same value as the - long long. -

-
- -
-

4.2.11 unsigned long long

- -

- An ECMAScript value V is - converted - to an IDL unsigned long long value - by running the following algorithm: -

-
    -
  1. Initialize x to ToNumber(V).
  2. -
  3. If the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. If x is NaN, +∞, or −∞, then throw a TypeError.
    2. -
    3. Set x to sign(x) * floor(abs(x)).
    4. -
    5. If x < 0 or x > 253 − 1, then throw a TypeError.
    6. -
    7. Return the IDL unsigned long long value that represents the same numeric value as x.
    8. -
    -
  4. -
  5. If x is not NaN and the conversion to an IDL value is being performed due to any of the following: - - then: -
      -
    1. Set x to min(max(x, 0), 253 − 1).
    2. -
    3. Round x to the nearest integer, choosing the even integer if it lies halfway between two, and choosing +0 rather than −0.
    4. -
    5. Return the IDL unsigned long long value that represents the same numeric value as x.
    6. -
    -
  6. -
  7. If x is NaN, +0, −0, +∞, or −∞, then return the IDL unsigned long long value that represents 0.
  8. -
  9. Set x to sign(x) * floor(abs(x)).
  10. -
  11. Set x to x modulo 264.
  12. -
  13. Return the IDL unsigned long long value that represents the same numeric value as x.
  14. -
-

- The result of converting - an IDL unsigned long long value to an ECMAScript - value is a Number value that - represents the closest numeric value to the unsigned long long, - choosing the numeric value with an even significand if there are - two equally close values ( - [ECMA-262] - , section 6.1.6). - If the unsigned long long is less than or equal to 253 − 1, - then the Number will be able to - represent exactly the same value as the - unsigned long long. -

-
- -
-

4.2.12 float

- -

- An ECMAScript value V is - converted - to an IDL float value - by running the following algorithm: -

-
    -
  1. Let x be ToNumber(V).
  2. -
  3. If x is NaN, +Infinity or - −Infinity, then throw a TypeError.
  4. -
  5. - Let S be the set of finite IEEE 754 single-precision floating - point values except −0, but with two special values added: 2128 and - −2128. -
  6. -
  7. - Let y be the number in S that is closest - to x, selecting the number with an - even significand if there are two equally close values ( - [ECMA-262] - , section 6.1.6). - (The two special values 2128 and −2128 - are considered to have even significands for this purpose.) -
  8. -
  9. - If y is 2128 or −2128, then throw a TypeError. -
  10. -
  11. - If y is +0 and x is negative, return −0. -
  12. -
  13. - Return y. -
  14. -
-

- The result of converting - an IDL float value to an ECMAScript - value is the Number value that represents the same numeric value as the IDL - float value. -

-
- -
-

4.2.13 unrestricted float

- -

- An ECMAScript value V is - converted - to an IDL unrestricted float value - by running the following algorithm: -

-
    -
  1. Let x be ToNumber(V).
  2. -
  3. If x is NaN, then return the IDL unrestricted float value that represents the IEEE 754 NaN value with the bit pattern 0x7fc00000 [IEEE-754].
  4. -
  5. - Let S be the set of finite IEEE 754 single-precision floating - point values except −0, but with two special values added: 2128 and - −2128. -
  6. -
  7. - Let y be the number in S that is closest - to x, selecting the number with an - even significand if there are two equally close values ( - [ECMA-262] - , section 6.1.6). - (The two special values 2128 and −2128 - are considered to have even significands for this purpose.) -
  8. -
  9. - If y is 2128, return +∞. -
  10. -
  11. - If y is −2128, return −∞. -
  12. -
  13. - If y is +0 and x is negative, return −0. -
  14. -
  15. - Return y. -
  16. -
-
Note
-

- Since there is only a single ECMAScript NaN value, - it must be canonicalized to a particular single precision IEEE 754 NaN value. The NaN value - mentioned above is chosen simply because it is the quiet NaN with the lowest - value when its bit pattern is interpreted as an unsigned 32 bit integer. -

-
-

- The result of converting - an IDL unrestricted float value to an ECMAScript - value is a Number: -

-
    -
  • - If the IDL unrestricted float value is a NaN, - then the Number value is NaN. -
  • -
  • - Otherwise, the Number value is - the one that represents the same numeric value as the IDL - unrestricted float value. -
  • -
-
- -
-

4.2.14 double

- -

- An ECMAScript value V is - converted - to an IDL double value - by running the following algorithm: -

-
    -
  1. Let x be ToNumber(V).
  2. -
  3. If x is NaN, +Infinity or - −Infinity, then throw a TypeError.
  4. -
  5. - Return the IDL double value - that has the same numeric value as x. -
  6. -
-

- The result of converting - an IDL double value to an ECMAScript - value is the Number value that represents the - same numeric value as the IDL double value. -

-
- -
-

4.2.15 unrestricted double

- -

- An ECMAScript value V is - converted - to an IDL unrestricted double value - by running the following algorithm: -

-
    -
  1. Let x be ToNumber(V).
  2. -
  3. If x is NaN, then return the IDL unrestricted double value that represents the IEEE 754 NaN value with the bit pattern 0x7ff8000000000000 [IEEE-754].
  4. -
  5. - Return the IDL unrestricted double value - that has the same numeric value as x. -
  6. -
-
Note
-

- Since there is only a single ECMAScript NaN value, - it must be canonicalized to a particular double precision IEEE 754 NaN value. The NaN value - mentioned above is chosen simply because it is the quiet NaN with the lowest - value when its bit pattern is interpreted as an unsigned 64 bit integer. -

-
-

- The result of converting - an IDL unrestricted double value to an ECMAScript - value is a Number: -

-
    -
  • - If the IDL unrestricted double value is a NaN, - then the Number value is NaN. -
  • -
  • - Otherwise, the Number value is - the one that represents the same numeric value as the IDL - unrestricted double value. -
  • -
-
- -
-

4.2.16 DOMString

- -

- An ECMAScript value V is - converted - to an IDL DOMString value - by running the following algorithm: -

-
    -
  1. If V is null - and the conversion to an IDL value is being performed due - to any of the following: - - then return the DOMString - value that represents the empty string. -
  2. -
  3. Let x be ToString(V).
  4. -
  5. Return the IDL DOMString value that represents the same sequence of code units as the one the ECMAScript String value x represents.
  6. -
-

- The result of converting - an IDL DOMString value to an ECMAScript - value is the String - value that represents the same sequence of code units that the - IDL DOMString represents. -

-
- -
-

4.2.17 ByteString

- -

- An ECMAScript value V is - converted - to an IDL ByteString value - by running the following algorithm: -

-
    -
  1. Let x be ToString(V).
  2. -
  3. If the value of any element - of x is greater than 255, then throw a TypeError.
  4. -
  5. Return an IDL ByteString value - whose length is the length of x, and where the value of each element is - the value of the corresponding element of x.
  6. -
-

- The result of converting - an IDL ByteString value to an ECMAScript - value is a String - value whose length is the length of the ByteString, - and the value of each element of which is the value of the corresponding element - of the ByteString. -

-
- -
-

4.2.18 USVString

- -

- An ECMAScript value V is - converted - to an IDL USVString value - by running the following algorithm: -

-
    -
  1. Let string be the result of converting V - to a DOMString.
  2. -
  3. Return an IDL USVString value - that is the result of converting string to a sequence of Unicode scalar values.
  4. -
-

- An IDL USVString value is - converted - to an ECMAScript value by running the following algorithm: -

-
    -
  1. Let scalarValues be the sequence of Unicode scalar values the USVString represents.
  2. -
  3. Let string be the sequence of code units that results from encoding scalarValues in UTF-16.
  4. -
  5. Return the String value that represents the same sequence of code units as string.
  6. -
-
- -
-

4.2.19 object

- -

- IDL object - values are represented by ECMAScript Object values. -

-

- An ECMAScript value V is - converted - to an IDL object value - by running the following algorithm: -

-
    -
  1. If Type(V) is not Object, then throw a TypeError.
  2. -
  3. Return the IDL object value that is a reference to the same object as V.
  4. -
-

- The result of converting - an IDL object value to an ECMAScript - value is the Object value that represents a reference to the same object that the - IDL object represents. -

-
- -
-

4.2.20 Interface types

- -

- IDL interface type - values are represented by ECMAScript Object or - Function values. -

-

- An ECMAScript value V is - converted - to an IDL interface type value - by running the following algorithm (where I is the interface): -

-
    -
  1. If Type(V) is not Object, then throw a TypeError.
  2. -
  3. If V is a platform object that implements I, then return the IDL interface type value that represents a reference to that platform object.
  4. -
  5. If V is a user object - that is considered to implement I according to the rules in - section 4.8 , - then return the IDL interface type value that represents a reference to that user object, - with the incumbent script - as the callback context. [HTML]
  6. -
  7. Throw a TypeError.
  8. -
-

- The result of converting - an IDL interface type - value to an ECMAScript value is the Object - value that represents a reference to the same object that the IDL - interface type value represents. -

-
- -
-

4.2.21 Dictionary types

- -

- IDL dictionary type values are represented - by ECMAScript Object values. Properties on - the object (or its prototype chain) correspond to dictionary members. -

-

- An ECMAScript value V is - converted - to an IDL dictionary type value - by running the following algorithm (where D is the dictionary): -

-
    -
  1. If Type(V) is not Undefined, Null or Object, then throw a TypeError.
  2. -
  3. If V object, then throw a TypeError.
  4. -
  5. Let dict be an empty dictionary value of type D; - every dictionary member - is initially considered to be not present.
  6. -
  7. Let dictionaries be a list consisting of D and all of D’s inherited dictionaries, - in order from least to most derived.
  8. -
  9. For each dictionary dictionary in dictionaries, in order: -
      -
    1. For each dictionary member member declared on dictionary, in lexicographical order: -
        -
      1. Let key be the identifier of member.
      2. -
      3. Let value be an ECMAScript value, depending on Type(V): -
        -
        Undefined
        -
        Null
        -
        value is undefined.
        -
        anything else
        -
        value is the result of calling the [[Get]] internal method on V with property name key.
        -
        -
      4. -
      5. If value is not undefined, then: -
          -
        1. Let idlValue be the result of converting value to an IDL value whose type is the type member is declared to be of.
        2. -
        3. Set the dictionary member on dict with key name key to the value idlValue. This dictionary member is considered to be present.
        4. -
        -
      6. -
      7. Otherwise, if value is undefined but the dictionary member has a default value, then: -
          -
        1. Let idlValue be the dictionary member’s default value.
        2. -
        3. Set the dictionary member on dict with key name key to the value idlValue. This dictionary member is considered to be present.
        4. -
        -
      8. -
      9. - Otherwise, if value is - undefined and the - dictionary - member is a - required dictionary - member, then throw a TypeError. -
      10. -
      -
    2. -
    -
  10. -
  11. Return dict.
  12. -
-
Note
-

- The order that dictionary members are looked - up on the ECMAScript object are not necessarily the same as the object’s property enumeration order. -

-
-

- An IDL dictionary value V is - converted - to an ECMAScript Object value - by running the following algorithm (where D is the dictionary): -

-
    -
  1. Let O be a new Object value created as if by the expression ({}).
  2. -
  3. Let dictionaries be a list consisting of D and all of D’s inherited dictionaries, - in order from least to most derived.
  4. -
  5. For each dictionary dictionary in dictionaries, in order: -
      -
    1. For each dictionary member member declared on dictionary, in lexicographical order: -
        -
      1. Let key be the identifier of member.
      2. -
      3. If the dictionary member named key is present in V, then: -
          -
        1. Let idlValue be the value of member on V.
        2. -
        3. Let value be the result of converting idlValue to an ECMAScript value.
        4. -
        5. Call the [[DefineOwnProperty]] internal method on O with property name key, - Property Descriptor { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true, [[Value]]: value } - and Boolean flag false.
        6. -
        -
      4. -
      -
    2. -
    -
  6. -
  7. Return O.
  8. -
-
- -
-

4.2.22 Enumeration types

- -

- IDL enumeration types are represented by ECMAScript String - values. -

-

- An ECMAScript value V is - converted - to an IDL enumeration type - value as follows (where E is the enumeration): -

-
    -
  1. Let S be the result of calling ToString(V).
  2. -
  3. If S is not one of E’s enumeration values, then throw a TypeError.
  4. -
  5. Return the enumeration value of type E that is equal to S.
  6. -
-

- The result of converting - an IDL enumeration type value to an ECMAScript - value is the String - value that represents the same sequence of code units as - the enumeration value. -

-
- -
-

4.2.23 Callback function types

- -

- IDL callback function types are represented by ECMAScript Function - objects, except in the [TreatNonObjectAsNull] case, - when they can be any object. -

-

- An ECMAScript value V is - converted - to an IDL callback function type value - by running the following algorithm: -

-
    -
  1. If the result of calling IsCallable(V) is false and the conversion to an IDL value - is not being performed due - to V being assigned to an attribute - whose type is a nullable - callback function - that is annotated with [TreatNonObjectAsNull], - then throw a TypeError.
  2. -
  3. Return the IDL callback - function type value that represents a reference to the same - object that V represents, - with the incumbent script - as the callback context. [HTML].
  4. -
-

- The result of converting - an IDL callback function type - value to an ECMAScript value is a reference to the same object - that the IDL callback function type value represents. -

-
- -
-

4.2.24 Nullable types — T?

- -

- IDL nullable type values are represented - by values of either the ECMAScript type corresponding to the inner IDL type, or - the ECMAScript null value. -

-

- An ECMAScript value V is - converted - to an IDL nullable type T? - value (where T is the inner type) as follows: -

-
    -
  1. - If Type(V) is not Object, and - the conversion to an IDL value is being performed due - to V being assigned to an attribute - whose type is a nullable - callback function - that is annotated with [TreatNonObjectAsNull], - then return the IDL - nullable type T? - value null. -
  2. -
  3. - Otherwise, if V is null or undefined, then return the IDL - nullable type T? - value null. -
  4. -
  5. - Otherwise, return the result of - converting V - using the rules for the inner IDL type T. -
  6. -
-

- The result of converting - an IDL nullable type value to an ECMAScript value is: -

- -
- -
-

4.2.25 Sequences — sequence<T>

- -

- IDL sequence<T> values are represented by - ECMAScript Array values. -

-

- An ECMAScript value V is converted - to an IDL sequence<T> value as follows: -

-
    -
  1. - If V is not an object, - throw a - TypeError. -
  2. -
  3. - If V object, - throw a - TypeError. -
  4. -
  5. - Let method be the result of - GetMethod(V, @@iterator). -
  6. -
  7. - ReturnIfAbrupt(method). -
  8. -
  9. - If method is undefined, - throw a - TypeError. -
  10. -
  11. - Return the result of - creating a sequence - of type sequence<T> - from V and method. -
  12. -
- -

- An IDL sequence value S of type - sequence<T> is - converted - to an ECMAScript Array object as follows: -

-
    -
  1. Let n be the length of S.
  2. -
  3. Let A be a new Array object created as if by the expression [].
  4. -
  5. Initialize i to be 0.
  6. -
  7. While i < n: -
      -
    1. Let V be the value in S at index i.
    2. -
    3. Let E be the result of converting - V to an ECMAScript value.
    4. -
    5. Let P be the result of calling ToString(i).
    6. -
    7. Call the [[DefineOwnProperty]] internal method on A with property name P, - Property Descriptor { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true, [[Value]]: E } - and Boolean flag false.
    8. -
    9. Set i to i + 1.
    10. -
    -
  8. -
  9. Return A.
  10. -
- -
-
4.2.25.1 Creating a sequence from an iterable
-

- To create an IDL value of type sequence<T> given an - iterable iterable and an iterator getter - method, perform the following steps: -

-
    -
  1. - Let iter be - GetIterator(iterable, method). -
  2. -
  3. - ReturnIfAbrupt(iter). -
  4. -
  5. Initialize i to be 0.
  6. -
  7. Repeat -
      -
    1. - Let next be IteratorStep(iter). -
    2. -
    3. ReturnIfAbrupt(next).
    4. -
    5. - If next is false, - then return an IDL sequence value of type - sequence<T> - of length i, where the value of the element - at index j is - Sj. -
    6. -
    7. - Let nextItem be - IteratorValue(next). -
    8. -
    9. ReturnIfAbrupt(nextItem).
    10. -
    11. - Initialize Si to the result of - converting - nextItem to an IDL value of type T. -
    12. -
    13. Set i to i + 1.
    14. -
    -
  8. -
-
-
Example
-

- The following interface defines - an attribute of a sequence - type as well as an operation - with an argument of a sequence type. -

-
IDL
interface Canvas {
-
-  sequence<DOMString> getSupportedImageCodecs();
-
-  void drawPolygon(sequence<double> coordinates);
-  sequence<double> getLastDrawnPolygon();
-
-  // ...
-};
-

- In an ECMAScript implementation of this interface, an Array - object with elements of type String is used to - represent a sequence<DOMString>, while an - Array with elements of type Number - represents a sequence<double>. The - Array objects are effectively passed by - value; every time the getSupportedImageCodecs() - function is called a new Array is - returned, and whenever an Array is - passed to drawPolygon no reference - will be kept after the call completes. -

-
ECMAScript

-// Obtain an instance of Canvas.  Assume that getSupportedImageCodecs()
-// returns a sequence with two DOMString values: "image/png" and "image/svg+xml".
-var canvas = getCanvas();
-
-// An Array object of length 2.
-var supportedImageCodecs = canvas.getSupportedImageCodecs();
-
-// Evaluates to "image/png".
-supportedImageCodecs[0];
-
-// Each time canvas.getSupportedImageCodecs() is called, it returns a
-// new Array object.  Thus modifying the returned Array will not
-// affect the value returned from a subsequent call to the function.
-supportedImageCodecs[0] = "image/jpeg";
-
-// Evaluates to "image/png".
-canvas.getSupportedImageCodecs()[0];
-
-// This evaluates to false, since a new Array object is returned each call.
-canvas.getSupportedImageCodecs() == canvas.getSupportedImageCodecs();
-
-
-// An Array of Numbers...
-var a = [0, 0, 100, 0, 50, 62.5];
-
-// ...can be passed to a platform object expecting a sequence<double>.
-canvas.drawPolygon(a);
-
-// Each element will be converted to a double by first calling ToNumber().
-// So the following call is equivalent to the previous one, except that
-// "hi" will be alerted before drawPolygon() returns.
-a = [false, '',
-     { valueOf: function() { alert('hi'); return 100; } }, 0,
-     '50', new Number(62.5)];
-canvas.drawPolygon(s);
-
-// Modifying an Array that was passed to drawPolygon() is guaranteed not to
-// have an effect on the Canvas, since the Array is effectively passed by value.
-a[4] = 20;
-var b = canvas.getLastDrawnPolygon();
-alert(b[4]);    // This would alert "50".
-
- -
- -
-

4.2.26 Promise types — Promise<T>

- -

- IDL promise type values are - represented by ECMAScript Promise - objects. -

-

- An ECMAScript value V is - converted - to an IDL Promise<T> value as follows: -

-
    -
  1. Let resolve be the original value of %Promise%.resolve. - -
  2. -
  3. Let promise be the result of calling resolve with %Promise% - as the this value and V as the single argument value.
  4. -
  5. Return the IDL promise type value that is a reference to the - same object as promise.
  6. -
-

- The result of converting - an IDL promise type value to an ECMAScript - value is the Promise value that represents a reference to the same object that the - IDL promise type represents. -

-

- One can perform some steps once a promise is settled. - There can be one or two sets of steps to perform, covering when the promise is fulfilled, rejected, or both. - When a specification says to perform some steps once a promise is settled, the following steps - MUST be followed: -

-
    -
  1. Let promise be the promise object of type Promise<T>.
  2. -
  3. - Let onFulfilled be a new function object whose - behavior when invoked is as follows: -
      -
    1. If T is void, then: -
      1. Return the result of performing any steps that were required to be run if the promise was fulfilled.
    2. -
    3. Otherwise, T is a type other than void: -
        -
      1. Let V be the first argument to onFulfilled.
      2. -
      3. Let value be the result of converting - V to an IDL value of type T.
      4. -
      5. If there are no steps that are required to be run if the promise was fulfilled, then - return undefined.
      6. -
      7. Otherwise, return the result of performing any steps that were required to be run if the promise was fulfilled, - with value as the promise’s value.
      8. -
      -
    4. -
    -
  4. -
  5. - Let onRejected be a new function object whose - behavior when invoked is as follows: -
      -
    1. Let R be the first argument to onRejected.
    2. -
    3. Let reason be the result of converting - R to an IDL value of type any.
    4. -
    5. If there are no steps that are required to be run if the promise was rejected, then - return undefined.
    6. -
    7. Otherwise, return the result of performing any steps that were required to be run if the promise was rejected, - with reason as the rejection reason.
    8. -
    -
  6. -
  7. Let then be the result of calling the internal [[Get]] method of promise with property name “then”.
  8. -
  9. If then is not callable, then throw a TypeError.
  10. -
  11. Return the result of calling then with promise as the this value and onFulfilled and onRejected - as its two arguments.
  12. -
- -
- -
-

4.2.27 Union types

- -

- IDL union type values are - represented by ECMAScript values that correspond to the union’s - member types. -

-

- To convert an ECMAScript value V to an IDL union type - value is done as follows: -

-
    -
  1. If the union type - includes a nullable type and - V is null or undefined, - then return the IDL value null.
  2. -
  3. - Let types be the flattened member types - of the union type. -
  4. -
  5. - If V is a platform object, but not a - platform array object, then: -
      -
    1. If types includes an interface type that V - implements, then return the IDL value that is a reference to the object V.
    2. -
    3. If types includes object, then return the IDL value - that is a reference to the object V.
    4. -
    -
  6. -
  7. - If V object, then: -
      - -
    1. If types includes object, then return the IDL value - that is a reference to the object V.
    2. -
    -
  8. -
  9. - If V is a DOMException platform object, then: -
      -
    1. If types includes DOMException or - Error, then return the - result of converting - V to that type.
    2. -
    3. If types includes object, then return the IDL value - that is a reference to the object V.
    4. -
    -
  10. -
  11. - If V is a native Error object (that is, it has an [[ErrorData]] internal slot), then: -
      -
    1. If types includes Error, then return the - result of converting - V to Error.
    2. -
    3. If types includes object, then return the IDL value - that is a reference to the object V.
    4. -
    -
  12. -
  13. - If V is an object with an [[ArrayBufferData]] internal slot, then: -
      -
    1. If types includes ArrayBuffer, then return the - result of converting - V to ArrayBuffer.
    2. -
    3. If types includes object, then return the IDL value - that is a reference to the object V.
    4. -
    -
  14. -
  15. - If V is an object with a [[DataView]] internal slot, then: -
      -
    1. If types includes DataView, then return the - result of converting - V to DataView.
    2. -
    3. If types includes object, then return the IDL value - that is a reference to the object V.
    4. -
    -
  16. -
  17. - If V is an object with a [[TypedArrayName]] internal slot, then: -
      -
    1. If types includes a typed array type - whose name is the value of V’s [[TypedArrayName]] internal slot, then return the - result of converting - V to that type.
    2. -
    3. If types includes object, then return the IDL value - that is a reference to the object V.
    4. -
    -
  18. -
  19. - If IsCallable(V) is true, then: -
      -
    1. If types includes a callback function - type, then return the result of - converting - V to that callback function type.
    2. -
    3. If types includes object, then return the IDL value - that is a reference to the object V.
    4. -
    -
  20. -
  21. If V is null or undefined object, then: -
      -
    1. If types includes a dictionary type, then return the - result of converting - V to that dictionary type.
    2. -
    -
  22. -
  23. - If V object, then: -
      -
    1. - If types includes a sequence type, then -
        -
      1. - Let method be the result of - GetMethod(V, @@iterator). -
      2. -
      3. - ReturnIfAbrupt(method). -
      4. -
      5. - If method is not - undefined, - return the result of - creating a - sequence of that type from V and - method. -
      6. -
      -
    2. - -
    3. If types includes a callback interface - type, then return the result of - converting - V to that interface type.
    4. -
    5. If types includes object, then return the IDL value - that is a reference to the object V.
    6. -
    -
  24. -
  25. - If V is a Boolean value, then: -
      -
    1. - If types includes a boolean, - then return the result of converting - V to boolean. -
    2. -
    -
  26. -
  27. - If V is a Number value, then: -
      -
    1. - If types includes a numeric type, - then return the result of converting - V to that numeric type. -
    2. -
    -
  28. -
  29. - If types includes a string type, - then return the result of - converting - V to that type. -
  30. -
  31. - If types includes a numeric type, - then return the result of converting - V to that numeric type. -
  32. -
  33. - If types includes a boolean, - then return the result of converting - V to boolean. -
  34. -
  35. - Throw a TypeError. -
  36. -
-

- An IDL union type value is - converted to an ECMAScript value - as follows. If the value is an object - reference to a special object that represents an ECMAScript undefined - value, then it is converted to the ECMAScript - undefined value. Otherwise, - the rules for converting the specific type - of the IDL union type value as described in this section (). -

-
- - - -
-

4.2.28 Error

- -

- IDL Error values are represented - by native ECMAScript Error objects and - platform objects for DOMExceptions. -

-

- An ECMAScript value V is - converted - to an IDL Error value - by running the following algorithm: -

-
    -
  1. If Type(V) is not Object, - or V does not have an [[ErrorData]] internal slot, then throw a TypeError.
  2. -
  3. Return the IDL Error value that is a reference to the same object as V.
  4. -
-

- The result of converting - an IDL Error value to an ECMAScript - value is the Error value that represents a reference to the same object that the - IDL Error represents. -

-
- -
-

4.2.29 DOMException

- -

- IDL DOMException values are represented - by platform objects for DOMExceptions. -

-

- An ECMAScript value V is - converted - to an IDL DOMException value - by running the following algorithm: -

-
    -
  1. If Type(V) is not Object, - or V is not a platform object that represents a DOMException, then throw a TypeError.
  2. -
  3. Return the IDL DOMException value that is a reference to the same object as V.
  4. -
-

- The result of converting - an IDL DOMException value to an ECMAScript - value is the Object value that represents a reference to the same object that the - IDL DOMException represents. -

-
- -
-

4.2.30 Buffer source types

- -

- Values of the IDL buffer source types - are represented by objects of the corresponding ECMAScript class. -

-

- An ECMAScript value V is - converted - to an IDL ArrayBuffer value - by running the following algorithm: -

-
    -
  1. If Type(V) is not Object, - or V does not have an [[ArrayBufferData]] internal slot, - or IsDetachedBuffer(V) is true, - then throw a TypeError. -
  2. -
  3. Return the IDL ArrayBuffer value that is a reference to the same object as V.
  4. -
-

- An ECMAScript value V is - converted - to an IDL DataView value - by running the following algorithm: -

-
    -
  1. If Type(V) is not Object, - or V does not have a [[DataView]] internal slot, - then throw a TypeError.
  2. -
  3. Return the IDL DataView value that is a reference to the same object as V.
  4. -
-

- An ECMAScript value V is - converted - to an IDL Int8Array, - Int16Array, - Int32Array, - Uint8Array, - Uint16Array, - Uint32Array, - Uint8ClampedArray, - Float32Array or - Float64Array value - by running the following algorithm: -

-
    -
  1. Let T be the IDL type V is being converted to.
  2. -
  3. If Type(V) is not Object, - or V does not have a [[TypedArrayName]] internal slot - with a value equal to the name of T, - then throw a TypeError.
  4. -
  5. Return the IDL value of type T that is a reference to the same object as V.
  6. -
-

- The result of converting - an IDL value of any buffer source type - to an ECMAScript value is the Object value that represents - a reference to the same object that the IDL value represents. -

-

- When getting a reference to - or getting a copy of the bytes held by a buffer source - that is an ECMAScript ArrayBuffer, DataView - or typed array object, these steps MUST be followed: -

-
    -
  1. Let O be the ECMAScript object that is the buffer source.
  2. -
  3. Initialize arrayBuffer to O.
  4. -
  5. Initialize offset to 0.
  6. -
  7. Initialize length to 0.
  8. -
  9. If O has a [[ViewedArrayBuffer]] internal slot, then: -
      -
    1. Set arrayBuffer to the value of O’s [[ViewedArrayBuffer]] internal slot.
    2. -
    3. If arrayBuffer is undefined, then - throw a TypeError.
    4. -
    5. Set offset to the value of O’s [[ByteOffset]] internal slot.
    6. -
    7. Set length to the value of O’s [[ByteLength]] internal slot.
    8. -
    -
  10. -
  11. Otherwise, set length to the value of O’s [[ArrayBufferByteLength]] internal slot.
  12. -
  13. If IsDetachedBuffer(O), then - throw a TypeError.
  14. -
  15. Let data be the value of O’s [[ArrayBufferData]] internal slot.
  16. -
  17. Return a reference to or copy of (as required) the length bytes in data - starting at byte offset offset.
  18. -
-

- To detach an ArrayBuffer, - these steps MUST be followed: -

-
    -
  1. Let O be the ECMAScript object that is the ArrayBuffer.
  2. -
  3. DetachArrayBuffer(O).
  4. -
-
- - -
- -
-

4.3 ECMAScript-specific extended attributes

- -

- This section defines a number of - extended attributes - whose presence affects only the ECMAScript binding. -

- -
-

4.3.1 [Clamp]

- -

- If the [Clamp] - extended attribute - appears on an operation argument, - writable attribute or - dictionary member - whose type is one of the integer types, - it indicates that when an ECMAScript Number is - converted to the IDL type, out of range values will be clamped to the range - of valid values, rather than using the operators that use a modulo operation - (ToInt32, ToUint32, etc.). -

-

- The [Clamp] - extended attribute MUST - take no arguments. -

-

- The [Clamp] extended attribute - MUST NOT appear on a read only - attribute, or an attribute, operation argument or dictionary member - that is not of an integer type. It also MUST NOT - be used in conjunction with the [EnforceRange] - extended attribute. -

-

- See the rules for converting ECMAScript values to the various IDL integer - types in section 4.2 - for the specific requirements that the use of - [Clamp] entails. -

- -
Example
-

- In the following IDL fragment, - two operations are declared that - take three octet arguments; one uses - the [Clamp] extended attribute - on all three arguments, while the other does not: -

-
IDL
interface GraphicsContext {
-  void setColor(octet red, octet green, octet blue);
-  void setColorClamped([Clamp] octet red, [Clamp] octet green, [Clamp] octet blue);
-};
-

- In an ECMAScript implementation of the IDL, a call to setColorClamped with - Number values that are out of range for an - octet are clamped to the range [0, 255]. -

-
ECMAScript
// Get an instance of GraphicsContext.
-var context = getGraphicsContext();
-
-// Calling the non-[Clamp] version uses ToUint8 to coerce the Numbers to octets.
-// This is equivalent to calling setColor(255, 255, 1).
-context.setColor(-1, 255, 257);
-
-// Call setColorClamped with some out of range values.
-// This is equivalent to calling setColorClamped(0, 255, 255).
-context.setColorClamped(-1, 255, 257);
-
-
- -
-

4.3.2 [Constructor]

- -

- If the [Constructor] - extended attribute - appears on an interface, it indicates that - the interface object for this interface - will have an [[Construct]] internal method, - allowing objects implementing the interface to be constructed. -

-

- If it appears on a dictionary, then it - indicates that the ECMAScript global object will have a property whose - name is the identifier of the dictionary and whose value is a constructor - function that can return an ECMAScript object that represents a dictionary - value of the given type. -

-

- Multiple [Constructor] extended - attributes may appear on a given interface or dictionary. -

-

- The [Constructor] - extended attribute MUST either - take no arguments or - take an argument list. - The bare form, [Constructor], has the same meaning as - using an empty argument list, [Constructor()]. For each - [Constructor] extended attribute - on the interface, there will be a way to construct an object that implements - the interface by passing the specified arguments. -

-

- The prose definition of a constructor MUST - either return an IDL value of a type corresponding to the interface - or dictionary the [Constructor] - extended attribute appears on, or throw an exception. -

-

- If the [Constructor] extended attribute - is specified on an interface, then the [NoInterfaceObject] - extended attribute MUST NOT also be specified on that interface. -

-

- The [Constructor] extended attribute - MUST NOT be used on a callback interface. -

-

- See section 4.5.1.1 - for details on how a constructor - for an interface is to be implemented, and - section 4.5.3 - for how a constructor for a dictionary is to be implemented. -

- -
Example
-

- The following IDL defines two interfaces. The second has the - [Constructor] extended - attribute, while the first does not. -

-
IDL
interface NodeList {
-  Node item(unsigned long index);
-  readonly attribute unsigned long length;
-};
-
-[Constructor,
- Constructor(double radius)]
-interface Circle {
-  attribute double r;
-  attribute double cx;
-  attribute double cy;
-  readonly attribute double circumference;
-};
-

- An ECMAScript implementation supporting these interfaces would - have a [[Construct]] property on the - Circle interface object which would - return a new object that implements the interface. It would take - either zero or one argument. The - NodeList interface object would not - have a [[Construct]] property. -

-
ECMAScript
var x = new Circle();      // The uses the zero-argument constructor to create a
-                           // reference to a platform object that implements the
-                           // Circle interface.
-
-var y = new Circle(1.25);  // This also creates a Circle object, this time using
-                           // the one-argument constructor.
-
-var z = new NodeList();    // This would throw a TypeError, since no
-                           // [Constructor] is declared.
-
- -
Example
-

- The following IDL defines a dictionary type with a constructor: -

-
IDL
[Constructor(unsigned long patties, unsigned long cheeseSlices)]
-dictionary BurgerOrder {
-  unsigned long pattyCount;
-  unsigned long cheeseSliceCount;
-};
-

- The constructor is defined with the following prose: -

-
-

When the BurgerOrder constructor - is invoked, it must return a dictionary value of type - BurgerOrder whose - pattyCount and cheeseSliceCount members are set to the - values of the patties and cheeseSlices arguments, respectively.

-
-

- An ECMAScript implementation supporting this dictionary type - would have a constructor function on the global object that - returns a plain object with properties corresponding to - the dictionary’s members: -

-
ECMAScript
typeof BurgerOrder;                                // Evaluates to "function".
-
-var order = new BurgerOrder(1, 2);                 // Creates a new object.
-
-order.hasOwnProperty("pattyCount");                // Evaluates to true.
-order.pattyCount;                                  // Evaluates to 1.
-
-Object.getPrototypeOf(order) == Object.prototype;  // Evaluates to true.
-
-
- -
-

4.3.3 [EnforceRange]

- -

- If the [EnforceRange] - extended attribute - appears on an operation argument, - writable regular attribute or - dictionary member - whose type is one of the integer types, - it indicates that when an ECMAScript Number is - converted to the IDL type, out of range values will cause an exception to - be thrown, rather than converted to being a valid value using the operators that use a modulo operation - (ToInt32, ToUint32, etc.). The Number - will be rounded towards zero before being checked against its range. -

-

- The [EnforceRange] - extended attribute MUST - take no arguments. -

-

- The [EnforceRange] extended attribute - MUST NOT appear on a read only - attribute, a static attribute, - or an attribute, operation argument or dictionary member - that is not of an integer type. It also MUST NOT - be used in conjunction with the [Clamp] - extended attribute. -

-

- See the rules for converting ECMAScript values to the various IDL integer - types in section 4.2 - for the specific requirements that the use of - [EnforceRange] entails. -

- -
Example
-

- In the following IDL fragment, - two operations are declared that - take three octet arguments; one uses - the [EnforceRange] extended attribute - on all three arguments, while the other does not: -

-
IDL
interface GraphicsContext {
-  void setColor(octet red, octet green, octet blue);
-  void setColorEnforcedRange([EnforceRange] octet red, [EnforceRange] octet green, [EnforceRange] octet blue);
-};
-

- In an ECMAScript implementation of the IDL, a call to setColorEnforcedRange with - Number values that are out of range for an - octet will result in an exception being - thrown. -

-
ECMAScript
// Get an instance of GraphicsContext.
-var context = getGraphicsContext();
-
-// Calling the non-[EnforceRange] version uses ToUint8 to coerce the Numbers to octets.
-// This is equivalent to calling setColor(255, 255, 1).
-context.setColor(-1, 255, 257);
-
-// When setColorEnforcedRange is called, Numbers are rounded towards zero.
-// This is equivalent to calling setColor(0, 255, 255).
-context.setColorEnforcedRange(-0.9, 255, 255.2);
-
-// The following will cause a TypeError to be thrown, since even after
-// rounding the first and third argument values are out of range.
-context.setColorEnforcedRange(-1, 255, 256);
-
-
- -
-

4.3.4 [Exposed]

-

- If the [Exposed] - extended attribute - appears on an interface, - partial interface, - an individual interface member, or - dictionary with a constructor, - it indicates that the interface, interface member or dictionary constructor is exposed - on a particular set of global interfaces, rather than the default of - being exposed only on the primary global interface. -

-

- The [Exposed] - extended attribute - MUST either - take an identifier or - take an identifier list. - Each of the identifiers mentioned MUST be - a global name. -

-

- Every construct that the [Exposed] - extended attribute - can be specified on has an exposure set, - which is a set of interfaces - defining which global environments the construct can be used in. - The exposure set - for a given construct is defined as follows: -

- -

- If [Exposed] appears on an - overloaded operation, - then it MUST appear identically on all overloads. -

-

- The [Exposed] extended attribute - MUST NOT be specified on both an interface - member and a partial interface definition the interface member is declared on. -

-

- If [Exposed] appears on both an interface - and one of its interface members, then the interface member's - exposure set - MUST be a subset of the interface's - exposure set. -

-

- An interface's exposure set - MUST be a subset of the - exposure set of all - of the interface's consequential - interfaces. -

-

- If an interface X - inherits from another interface - Y then the - exposure set of - X MUST be a subset of the - exposure set of - Y. -

-

- The [Exposed] extended attribute - MUST NOT be specified on a dictionary - that does not also have a [Constructor] extended attribute. -

-

- An interface, - interface member or - dictionary - is exposed in a given ECMAScript global environment if - the ECMAScript global object implements an interface that is in the - interface, interface member or dictionary's - exposure set. -

-

- See - section 4.5 , - section 4.5.3 , - section 4.5.6 , - section 4.5.7 , - section 4.5.8 and - section 4.5.9 - for the specific requirements that the use of - [Exposed] entails. -

-
Example
-

[Exposed] - is intended to be used to control whether interfaces or individual interface - members are available for use only in workers, only in the Window, - or in both.

-

The following IDL fragment shows how that might be achieved:

-
IDL
[PrimaryGlobal]
-interface Window {
-  ...
-};
-
-// By using the same identifier Worker for both SharedWorkerGlobalScope
-// and DedicatedWorkerGlobalScope, both can be addressed in an [Exposed]
-// extended attribute at once.
-[Global=Worker]
-interface SharedWorkerGlobalScope : WorkerGlobalScope {
-  ...
-};
-
-[Global=Worker]
-interface DedicatedWorkerGlobalScope : WorkerGlobalScope {
-  ...
-};
-
-// MathUtils is available for use in workers and on the main thread.
-[Exposed=(Window,Worker)]
-interface MathUtils {
-  static double someComplicatedFunction(double x, double y);
-};
-
-// WorkerUtils is only available in workers.  Evaluating WorkerUtils
-// in the global scope of a worker would give you its interface object, while
-// doing so on the main thread will give you a ReferenceError.
-[Exposed=Worker]
-interface WorkerUtils {
-  static void setPriority(double x);
-};
-
-// Node is only available on the main thread.  Evaluating Node
-// in the global scope of a worker would give you a ReferenceError.
-interface Node {
-  ...
-};
-
-
- - - -
-

4.3.5 [Global] and [PrimaryGlobal]

- -

- If the [Global] - or [PrimaryGlobal] - extended attribute - appears on an interface, - it indicates that objects implementing this interface can - be used as the global object in an ECMAScript environment, - and that the structure of the prototype chain and how - properties corresponding to interface members - will be reflected on the prototype objects will be different from other - interfaces. Specifically: -

-
    -
  1. Any named properties - will be exposed on an object in the prototype chain – the - named properties object – - rather than on the object itself.
  2. -
  3. Interface members from the - interface (or - consequential interfaces) - will correspond to properties on the object itself rather than on - interface prototype objects.
  4. -
-
Note
-

- Placing named properties on an object in the prototype chain - is done so that variable declarations and bareword assignments - will shadow the named property with a property on the global - object itself. -

-

- Placing properties corresponding to interface members on - the object itself will mean that common feature detection - methods like the following will work: -

-
ECMAScript
var indexedDB = window.indexedDB || window.webkitIndexedDB ||
-                window.mozIndexedDB || window.msIndexedDB;
-
-var requestAnimationFrame = window.requestAnimationFrame ||
-                            window.mozRequestAnimationFrame || ...;
-

- Because of the way variable declarations are handled in - ECMAScript, the code above would result in the window.indexedDB - and window.requestAnimationFrame evaluating - to undefined, as the shadowing variable - property would already have been created before the - assignment is evaluated. -

-
-

- If the [Global] or - [PrimaryGlobal] - extended attributes - is used on an interface, then: -

- -

- If [Global] or [PrimaryGlobal] is specified on - a partial interface - definition, then that partial interface definition MUST - be the part of the interface definition that defines - the named property getter. -

-

- The [Global] and [PrimaryGlobal] - extended attribute MUST NOT - be used on an interface that can have more - than one object implementing it in the same ECMAScript global environment. -

-
Note
-

This is because the named properties object, - which exposes the named properties, is in the prototype chain, and it would not make - sense for more than one object’s named properties to be exposed on an object that - all of those objects inherit from.

-
-

- If an interface is declared with the [Global] or - [PrimaryGlobal] - extended attribute, then - there MUST NOT be more than one - interface member across - the interface and its consequential interfaces - with the same identifier. - There also MUST NOT be more than - one stringifier, - or more than one serializer - across those interfaces. -

-
Note
-

This is because all of the members of the interface and its consequential - interfaces get flattened down on to the object that implements the interface.

-
-

- The [Global] and - [PrimaryGlobal] extended attributes - can also be used to give a name to one or more global interfaces, - which can then be referenced by the [Exposed] - extended attribute. -

-

- The [Global] and - [PrimaryGlobal] - extended attributes MUST either - take no arguments - or take an identifier list. -

-

- If the [Global] or - [PrimaryGlobal] - extended attribute - is declared with an identifier list argument, then those identifiers are the interface’s - global names; otherwise, the interface has - a single global name, which is the interface's identifier. -

-
Note
-

The identifier argument list exists so that more than one global interface can - be addressed with a single name in an [Exposed] - extended attribute.

-
-

- The [Global] and - [PrimaryGlobal] - extended attributes - MUST NOT be declared on the same - interface. The [PrimaryGlobal] - extended attribute MUST be declared on - at most one interface. The interface [PrimaryGlobal] - is declared on, if any, is known as the primary global interface. -

-

- See section 4.5.5 , - section 4.7.3 and - section 4.7.7 - for the specific requirements that the use of - [Global] and [PrimaryGlobal] - entails for named properties, - and section 4.5.6 , - section 4.5.7 and - section 4.5.8 - for the requirements relating to the location of properties - corresponding to interface members. -

-
Example
-

- The [PrimaryGlobal] - extended attribute is intended - to be used by the Window interface as defined in - HTML5 ([HTML5], section 5.2). ([Global] - is intended to be used by worker global interfaces.) - The Window interface exposes frames as properties on the Window - object. Since the Window object also serves as the - ECMAScript global object, variable declarations or assignments to the named properties - will result in them being replaced by the new value. Variable declarations for - attributes will not create a property that replaces the existing one. -

-
IDL
[PrimaryGlobal]
-interface Window {
-  getter any (DOMString name);
-  attribute DOMString name;
-  // ...
-};
-

- The following HTML document illustrates how the named properties on the - Window object can be shadowed, and how - the property for an attribute will not be replaced when declaring - a variable of the same name: -

-
HTML
<!DOCTYPE html>
-<title>Variable declarations and assignments on Window</title>
-<iframe name=abc></iframe>
-<!-- Shadowing named properties -->
-<script>
-  window.abc;    // Evaluates to the iframe's Window object.
-  abc = 1;       // Shadows the named property.
-  window.abc;    // Evaluates to 1.
-</script>
-
-<!-- Preserving properties for IDL attributes -->
-<script>
-  Window.prototype.def = 2;         // Places a property on the prototype.
-  window.hasOwnProperty("length");  // Evaluates to true.
-  length;                           // Evaluates to 1.
-  def;                              // Evaluates to 2.
-</script>
-<script>
-  var length;                       // Variable declaration leaves existing property.
-  length;                           // Evaluates to 1.
-  var def;                          // Variable declaration creates shadowing property.
-  def;                              // Evaluates to undefined.
-</script>
-
-
- - -
-

4.3.6 [LenientThis]

-

- If the [LenientThis] - extended attribute - appears on a regular attribute, - it indicates that invocations of the attribute’s getter or setter - with a this value that is not an - object that implements the interface - on which the attribute appears will be ignored. -

-

- The [LenientThis] extended attribute - MUST - take no arguments. - It MUST NOT be used on a - static attribute. -

-
Warning
-

- Specifications SHOULD NOT use [LenientThis] - unless required for compatibility reasons. Specification authors who - wish to use this feature are strongly advised to discuss this on the - public-script-coord@w3.org - mailing list before proceeding. -

-
-

- See the Attributes section for how - [LenientThis] - is to be implemented. -

-
Example
-

- The following IDL fragment defines an interface that uses the - [LenientThis] extended - attribute. -

-
IDL
interface Example {
-  [LenientThis] attribute DOMString x;
-  attribute DOMString y;
-};
-

- An ECMAScript implementation that supports this interface will - allow the getter and setter of the accessor property that corresponds - to x to be invoked with something other than an Example - object. -

-
ECMAScript
var example = getExample();  // Get an instance of Example.
-var obj = { };
-
-// Fine.
-example.x;
-
-// Ignored, since the this value is not an Example object and [LenientThis] is used.
-Object.getOwnPropertyDescriptor(Example.prototype, "x").get.call(obj);
-
-// Also ignored, since Example.prototype is not an Example object and [LenientThis] is used.
-Example.prototype.x;
-
-// Throws a TypeError, since Example.prototype is not an Example object.
-Example.prototype.y;
-
-
- -
-

4.3.7 [NamedConstructor]

-

- If the [NamedConstructor] - extended attribute - appears on an interface, - it indicates that the ECMAScript global object will have a property with the - specified name whose value is a constructor function that can - create objects that implement the interface. - Multiple [NamedConstructor] extended - attributes may appear on a given interface. -

-

- The [NamedConstructor] - extended attribute MUST either - take an identifier or - take a named argument list. - The first form, [NamedConstructor=identifier], has the same meaning as - using an empty argument list, [NamedConstructor=identifier()]. For each - [NamedConstructor] extended attribute - on the interface, there will be a way to construct an object that implements - the interface by passing the specified arguments to the constructor function - that is the value of the aforementioned property. -

-

- The identifier used for the named constructor MUST NOT - be the same as that used by an [NamedConstructor] - extended attribute on another interface, MUST NOT - be the same as an identifier of an interface - that has an interface object, - and MUST NOT be one of the - reserved identifiers. -

-

- The [NamedConstructor] extended attribute - MUST NOT be used on a callback interface. -

-

- See section 4.5.2 - for details on how named constructors - are to be implemented. -

- -
Example
-

- The following IDL defines an interface that uses the - [NamedConstructor] extended - attribute. -

-
IDL
[NamedConstructor=Audio,
- NamedConstructor=Audio(DOMString src)]
-interface HTMLAudioElement : HTMLMediaElement {
-  // ...
-};
-

- An ECMAScript implementation that supports this interface will - allow the construction of HTMLAudioElement - objects using the Audio constructor. -

-
ECMAScript
typeof Audio;                   // Evaluates to 'function'.
-
-var a1 = new Audio();           // Creates a new object that implements
-                                // HTMLAudioElement, using the zero-argument
-                                // constructor.
-
-var a2 = new Audio('a.flac');   // Creates an HTMLAudioElement using the
-                                // one-argument constructor.
-
-
- -
-

4.3.8 [NewObject]

- -

- If the [NewObject] - extended attribute - appears on a regular - or static - operation, - then it indicates that when calling the operation, - a reference to a newly created object - MUST always be returned. -

-

- The [NewObject] - extended attribute MUST - take no arguments. -

-

- The [NewObject] - extended attribute MUST NOT - be used on anything other than a regular - or static - operation - whose return type - is an interface type or - a promise type. -

-
Example
-

- As an example, this extended attribute is suitable for use on - the createElement - operation on the Document - interface ([DOM], section 6.5), - since a new object should always be returned when - it is called. -

-
IDL
interface Document : Node {
-  [NewObject] Element createElement(DOMString localName);
-  ...
-};
-
-
- -
-

4.3.9 [NoInterfaceObject]

- -

- If the [NoInterfaceObject] - extended attribute - appears on an interface, - it indicates that an - interface object - will not exist for the interface in the ECMAScript binding. -

-
Warning
-

- The [NoInterfaceObject] extended attribute - SHOULD NOT be used on interfaces that are not - solely used as supplemental interfaces, - unless there are clear Web compatibility reasons for doing so. Specification authors who - wish to use this feature are strongly advised to discuss this on the - public-script-coord@w3.org - mailing list before proceeding. -

-
-

- The [NoInterfaceObject] extended attribute - MUST take no arguments. -

-

- If the [NoInterfaceObject] extended attribute - is specified on an interface, then the [Constructor] - extended attribute MUST NOT also be specified on that interface. - A [NamedConstructor] extended attribute is fine, - however. -

-

- The [NoInterfaceObject] extended attribute - MUST NOT be specified on an interface that has any - static operations defined on it. -

-

- The [NoInterfaceObject] extended attribute - MUST NOT be specified on a callback interface - unless it has a constant declared on it. - This is because callback interfaces without constants never have - interface objects. -

-

- An interface that does not have the [NoInterfaceObject] extended - attribute specified MUST NOT inherit - from an interface that has the [NoInterfaceObject] extended - attribute specified. -

-

- See section 4.5 - for the specific requirements that the use of - [NoInterfaceObject] entails. -

-
Example
-

- The following IDL - fragment defines two interfaces, one whose interface object - is exposed on the ECMAScript global object, and one whose isn’t: -

-
IDL
interface Storage {
-  void addEntry(unsigned long key, any value);
-};
-
-[NoInterfaceObject]
-interface Query {
-  any lookupEntry(unsigned long key);
-};
-

- An ECMAScript implementation of the above IDL would allow - manipulation of Storage’s - prototype, but not Query’s. -

-
ECMAScript
typeof Storage;                        // evaluates to "object"
-
-// Add some tracing alert() call to Storage.addEntry.
-var fn = Storage.prototype.addEntry;
-Storage.prototype.addEntry = function(key, value) {
-  alert('Calling addEntry()');
-  return fn.call(this, key, value);
-};
-
-typeof Query;                          // evaluates to "undefined"
-var fn = Query.prototype.lookupEntry;  // exception, Query isn’t defined
-
-
-
- -
-

4.3.10 [OverrideBuiltins]

- -

- If the [OverrideBuiltins] - extended attribute - appears on an interface, - it indicates that for a platform object implementing the interface, - properties corresponding to all of - the object’s supported property names - will appear to be on the object, - regardless of what other properties exist on the object or its - prototype chain. This means that named properties will always shadow - any properties that would otherwise appear on the object. - This is in contrast to the usual behavior, which is for named properties - to be exposed only if there is no property with the - same name on the object itself or somewhere on its prototype chain. -

-

- The [OverrideBuiltins] - extended attribute MUST - take no arguments - and MUST NOT appear on an interface - that does not define a named property getter - or that also is declared with the [Global] - or [PrimaryGlobal] - extended attribute. If the extended attribute is specified on - a partial interface - definition, then that partial interface definition MUST - be the part of the interface definition that defines - the named property getter. -

-

- See section 4.7.1 - and section 4.7.7 - for the specific requirements that the use of - [OverrideBuiltins] entails. -

-
Example
-

- The following IDL fragment - defines two interfaces, - one that has a named property getter - and one that does not. -

-
IDL
interface StringMap {
-  readonly attribute unsigned long length;
-  getter DOMString lookup(DOMString key);
-};
-
-[OverrideBuiltins]
-interface StringMap2 {
-  readonly attribute unsigned long length;
-  getter DOMString lookup(DOMString key);
-};
-

- In an ECMAScript implementation of these two interfaces, - getting certain properties on objects implementing - the interfaces will result in different values: -

-
ECMAScript
// Obtain an instance of StringMap.  Assume that it has "abc", "length" and
-// "toString" as supported property names.
-var map1 = getStringMap();
-
-// This invokes the named property getter.
-map1.abc;
-
-// This fetches the "length" property on the object that corresponds to the
-// length attribute.
-map1.length;
-
-// This fetches the "toString" property from the object's prototype chain.
-map1.toString;
-
-
-// Obtain an instance of StringMap2.  Assume that it also has "abc", "length"
-// and "toString" as supported property names.
-var map2 = getStringMap2();
-
-// This invokes the named property getter.
-map2.abc;
-
-// This also invokes the named property getter, despite the fact that the "length"
-// property on the object corresponds to the length attribute.
-map2.length;
-
-// This too invokes the named property getter, despite the fact that "toString" is
-// a property in map2's prototype chain.
-map2.toString;
-
-
- - - -
-

4.3.11 [PutForwards]

- -

- If the [PutForwards] - extended attribute - appears on a read only - regular attribute declaration whose type is - an interface type, - it indicates that assigning to the attribute will have specific behavior. - Namely, the assignment is “forwarded” to the attribute (specified by - the extended attribute argument) on the object that is currently - referenced by the attribute being assigned to. -

-

- The [PutForwards] extended - attribute MUST take an identifier. - Assuming that: -

- -

- then there MUST be another - attribute B - declared on J whose identifier - is N. Assignment of a value to the attribute A - on an object implementing I will result in that value - being assigned to attribute B of the object that A - references, instead. -

-

- Note that [PutForwards]-annotated - attributes can be - chained. That is, an attribute with the [PutForwards] - extended attribute - can refer to an attribute that itself has that extended attribute. - There MUST NOT exist a cycle in a - chain of forwarded assignments. A cycle exists if, when following - the chain of forwarded assignments, a particular attribute on - an interface is - encountered more than once. -

-

- An attribute with the [PutForwards] - extended attribute MUST NOT also be declared - with the [Replaceable] - extended attribute. -

-

- The [PutForwards] - extended attribute MUST NOT be used - on an attribute that - is not read only. -

-

- The [PutForwards] extended attribute - MUST NOT be used on a - static attribute. -

-

- The [PutForwards] extended attribute - MUST NOT be used on an attribute declared on - a callback interface. -

-

- See the Attributes section for how - [PutForwards] - is to be implemented. -

-
Example
-

- The following IDL fragment defines interfaces for names and people. - The [PutForwards] extended - attribute is used on the name attribute - of the Person interface to indicate - that assignments to that attribute result in assignments to the - full attribute of the - Person object: -

-
IDL
interface Name {
-  attribute DOMString full;
-  attribute DOMString family;
-  attribute DOMString given;
-};
-
-interface Person {
-  [PutForwards=full] readonly attribute Name name;
-  attribute unsigned short age;
-};
-

- In the ECMAScript binding, this would allow assignments to the - “name” property: -

-
ECMAScript
var p = getPerson();           // Obtain an instance of Person.
-
-p.name = 'John Citizen';       // This statement...
-p.name.full = 'John Citizen';  // ...has the same behavior as this one.
-
-
- -
-

4.3.12 [Replaceable]

- -

- If the [Replaceable] - extended attribute - appears on a read only - regular attribute, - it indicates that setting the corresponding property on the - platform object will result in - an own property with the same name being created on the object - which has the value being assigned. This property will shadow - the accessor property corresponding to the attribute, which - exists on the interface prototype object. -

-

- The [Replaceable] - extended attribute MUST - take no arguments. -

-

- An attribute with the [Replaceable] - extended attribute MUST NOT also be declared - with the [PutForwards] - extended attribute. -

-

- The [Replaceable] - extended attribute MUST NOT be used - on an attribute that - is not read only. -

-

- The [Replaceable] extended attribute - MUST NOT be used on a - static attribute. -

-

- The [Replaceable] extended attribute - MUST NOT be used on an attribute declared on - a callback interface. -

-

- See section 4.5.7 - for the specific requirements that the use of - [Replaceable] entails. -

-
Example
-

- The following IDL fragment - defines an interface - with an operation - that increments a counter, and an attribute - that exposes the counter’s value, which is initially 0: -

-
IDL
interface Counter {
-  [Replaceable] readonly attribute unsigned long value;
-  void increment();
-};
-

- Assigning to the “value” property - on a platform object implementing Counter - will shadow the property that corresponds to the - attribute: -

-
ECMAScript
var counter = getCounter();                              // Obtain an instance of Counter.
-counter.value;                                           // Evaluates to 0.
-
-counter.hasOwnProperty("value");                         // Evaluates to false.
-Object.getPrototypeOf(counter).hasOwnProperty("value");  // Evaluates to true.
-
-counter.increment();
-counter.increment();
-counter.value;                                           // Evaluates to 2.
-
-counter.value = 'a';                                     // Shadows the property with one that is unrelated
-                                                         // to Counter::value.
-
-counter.hasOwnProperty("value");                         // Evaluates to true.
-
-counter.increment();
-counter.value;                                           // Evaluates to 'a'.
-
-delete counter.value;                                    // Reveals the original property.
-counter.value;                                           // Evaluates to 3.
-
-
- -
-

4.3.13 [SameObject]

- -

- If the [SameObject] - extended attribute - appears on a read only - attribute, then it - indicates that when getting the value of the attribute on a given - object, the same value MUST always - be returned. -

-

- The [SameObject] - extended attribute MUST - take no arguments. -

-

- The [SameObject] - extended attribute MUST NOT - be used on anything other than a read only - attribute - whose type is an interface type - or object. -

-
Example
-

- As an example, this extended attribute is suitable for use on - the implementation - attribute on the Document - interface ([DOM], section 6.5), - since the same object is always returned for a given - Document object. -

-
IDL
interface Document : Node {
-  [SameObject] readonly attribute DOMImplementation implementation;
-  ...
-};
-
-
- -
-

4.3.14 [TreatNonObjectAsNull]

- -

- If the [TreatNonObjectAsNull] - extended attribute - appears on a callback function, - then it indicates that any value assigned to an attribute - whose type is a nullable - callback function - that is not an object will be converted to - the null value. -

-
Warning
-

- Specifications SHOULD NOT use [TreatNonObjectAsNull] - unless required to specify the behavior of legacy APIs or for consistency with these - APIs. Specification authors who - wish to use this feature are strongly advised to discuss this on the - public-script-coord@w3.org - mailing list before proceeding. At the time of writing, the only known - valid use of [TreatNonObjectAsNull] - is for the callback functions used as the type - of event handler IDL attributes - ([HTML5], section 6.1.6.1) - such as onclick and onerror. -

-
-

- See section 4.2.24 - for the specific requirements that the use of - [TreatNonObjectAsNull] entails. -

-
Example
-

- The following IDL fragment defines an interface that has one - attribute whose type is a [TreatNonObjectAsNull]-annotated - callback function and another whose type is a - callback function without the extended attribute: -

-
IDL
callback OccurrenceHandler = void (DOMString details);
-
-[TreatNonObjectAsNull]
-callback ErrorHandler = void (DOMString details);
-
-interface Manager {
-  attribute OccurrenceHandler? handler1;
-  attribute ErrorHandler? handler2;
-};
-

- In an ECMAScript implementation, assigning a value that is not - an object (such as a Number value) - to handler1 will have different behavior from that when assigning - to handler2: -

-
ECMAScript
var manager = getManager();  // Get an instance of Manager.
-
-manager.handler1 = function() { };
-manager.handler1;            // Evaluates to the function.
-
-try {
-  manager.handler1 = 123;    // Throws a TypeError.
-} catch (e) {
-}
-
-manager.handler2 = function() { };
-manager.handler2;            // Evaluates to the function.
-
-manager.handler2 = 123;
-manager.handler2;            // Evaluates to null.
-
-
- -
-

4.3.15 [TreatNullAs]

- -

- If the [TreatNullAs] - extended attribute - appears on an attribute - or operation argument whose type is - DOMString, - it indicates that a null value - assigned to the attribute or passed as the operation argument will be - handled differently from its default handling. Instead of being stringified - to “null”, which is the default, - it will be converted to the empty string “”. -

-

- If [TreatNullAs] is specified on - an operation itself, and that operation is on a callback interface, - then it indicates that a user object implementing the interface will have the return - value of the function that implements the operation handled in the same way as for operation arguments - and attributes, as above. -

-

- The [TreatNullAs] - extended attribute MUST take the identifier - EmptyString. -

-

- The [TreatNullAs] extended attribute - MUST NOT be specified on an operation argument, - attribute or operation return value whose type is not DOMString. -

-
Note
-

This means that even an attribute of type DOMString? must not - use [TreatNullAs], since null - is a valid value of that type.

-
-

- The [TreatNullAs] extended attribute - also MUST NOT be specified on an operation on - a non-callback interface. -

-

- See section 4.2.16 - for the specific requirements that the use of - [TreatNullAs] entails. -

-
Example
-

- The following IDL fragment defines an interface that has one - attribute with the [TreatNullAs] - extended attribute, and one operation with an argument that has - the extended attribute: -

-
IDL
interface Dog {
-  attribute DOMString name;
-  [TreatNullAs=EmptyString] attribute DOMString owner;
-
-  boolean isMemberOfBreed([TreatNullAs=EmptyString] DOMString breedName);
-};
-

- An ECMAScript implementation implementing the Dog - interface would convert a null value - assigned to the “owner” property or passed as the - argument to the isMemberOfBreed function - to the empty string rather than "null": -

-
ECMAScript
var d = getDog();         // Assume d is a platform object implementing the Dog
-                          // interface.
-
-d.name = null;            // This assigns the string "null" to the .name
-                          // property.
-
-d.owner = null;           // This assigns the string "" to the .owner property.
-
-d.isMemberOfBreed(null);  // This passes the string "" to the isMemberOfBreed
-                          // function.
-
-
- -
-

4.3.16 [Unforgeable]

- -

- If the [Unforgeable] - extended attribute - appears on a non-static - attribute - or non-static - operations, it indicates - that the attribute or operation will be reflected as an ECMAScript property in - a way that means its behavior cannot be modified and that performing - a property lookup on the object will always result in the attribute’s - property value being returned. In particular, the property will be - non-configurable and will exist as an own property on the object - itself rather than on its prototype. -

-

- If the [Unforgeable] - extended attribute - appears on an interface, - it indicates that all of the non-static - attributes - and non-static - operations declared on - that interface and its consequential interfaces - will be similarly reflected as own ECMAScript properties on objects - that implement the interface, rather than on the prototype. -

-

- An attribute or operation is said to be unforgeable - on a given interface A if any of the following are true: -

- -

- The [Unforgeable] - extended attribute MUST - take no arguments. -

-

- The [Unforgeable] - extended attribute MUST NOT appear on - anything other than an attribute, - non-static operation - or an interface. If it does - appear on an operation, then - it MUST appear on all operations with - the same identifier on that interface. -

-

- If an attribute or operation X is unforgeable - on an interface A, and A is one of the - inherited interfaces - of another interface B, then B and all of its - consequential interfaces - MUST NOT have a non-static attribute or - regular operation with the same - identifier as X. -

-
Note
-

For example, the following is disallowed:

-
IDL
interface A1 {
-  [Unforgeable] readonly attribute DOMString x;
-};
-interface B1 : A1 {
-  void x();  // Invalid; would be shadowed by A1's x.
-};
-
-interface B2 : A1 { };
-B2 implements Mixin;
-interface Mixin {
-  void x();  // Invalid; B2's copy of x would be shadowed by A1's x.
-};
-
-[Unforgeable]
-interface A2 {
-  readonly attribute DOMString x;
-};
-interface B3 : A2 {
-  void x();  // Invalid; would be shadowed by A2's x.
-};
-
-interface B4 : A2 { };
-B4 implements Mixin;
-interface Mixin {
-  void x();  // Invalid; B4's copy of x would be shadowed by A2's x.
-};
-
-interface A3 { };
-A3 implements A2;
-interface B5 : A3 {
-  void x();  // Invalid; would be shadowed by A3's mixed-in copy of A2's x.
-};
-
-

- See section 4.5.7 , - section 4.5.8 , - section 4.7 , - section 4.7.1 and - section 4.7.7 - for the specific requirements that the use of - [Unforgeable] entails. -

-
Example
-

- The following IDL fragment defines - an interface that has two attributes, - one of which is designated as [Unforgeable]: -

-
IDL
interface System {
-  [Unforgeable] readonly attribute DOMString username;
-  readonly attribute long long loginTime;
-};
-

- In an ECMAScript implementation of the interface, the username attribute will be exposed as a non-configurable property on the - object itself: -

-
ECMAScript
var system = getSystem();                      // Get an instance of System.
-
-system.hasOwnProperty("username");             // Evaluates to true.
-system.hasOwnProperty("loginTime");            // Evaluates to false.
-System.prototype.hasOwnProperty("username");   // Evaluates to false.
-System.prototype.hasOwnProperty("loginTime");  // Evaluates to true.
-
-try {
-  // This call would fail, since the property is non-configurable.
-  Object.defineProperty(system, "username", { value: "administrator" });
-} catch (e) { }
-
-// This defineProperty call would succeed, because System.prototype.loginTime
-// is configurable.
-var forgedLoginTime = 5;
-Object.defineProperty(System.prototype, "loginTime", { value: forgedLoginTime });
-
-system.loginTime;  // So this now evaluates to forgedLoginTime.
-
-
- - -
- -
-

4.4 Security

- -

- Certain algorithms in the sections below are defined to - perform a security check on a given - object. This check is used to determine whether a given - operation invocation or - attribute access should be - allowed. The input to the security check is the - platform object on - which the operation invocation or attribute access is being done, - and the ECMAScript global environment associated with the - Function object that implements the - operation or attribute. -

-
Note
-

The expectation is that the HTML specification defines how a - security check is performed, and that it will either throw an - appropriate exception or return normally. [HTML]

-
-
- -
-

4.5 Interfaces

- -

- For every interface that - is exposed in a given - ECMAScript global environment and: -

- -

- a corresponding property MUST exist on the - ECMAScript environment's global object. - The name of the property is the identifier of the interface, - and its value is an object called the interface object. -

-

- The property has the attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. - The characteristics of an interface object are described in section 4.5.1 - . -

- -

- In addition, for every [NamedConstructor] - extended attribute on an exposed interface, a corresponding property MUST - exist on the ECMAScript global object. The name of the property is the - identifier that occurs directly after the - “=”, and its value is an object called a - named constructor, which allows - construction of objects that implement the interface. The property has the - attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. - The characteristics of a named constructor are described in - section 4.5.2 - . -

- -
-

4.5.1 Interface object

- -

- The interface object for a given non-callback interface - is a function object. - It has properties that correspond to - the constants and - static operations - defined on that interface, as described in sections - 4.5.6 Constants4.5.6 and - 4.5.8 Operations4.5.8 - . -

-

- The [[Prototype]] internal property of - an interface object for a non-callback interface is determined as - follows: -

-
    -
  1. - If the interface inherits from some other interface, the value - of [[Prototype]] is the interface - object for that other interface. -
  2. -
  3. - If the interface doesn't inherit from any other interface, - the value of [[Prototype]] is - %FunctionPrototype% ( - [ECMA-262] - , section 6.1.7.4). -
  4. -
-

- An interface object for a non-callback interface MUST have a property named “prototype” - with attributes - { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: false } - whose value is an object called the interface prototype object. This object has properties - that correspond to the regular attributes and - regular operations defined on the interface, - and is described in more detail in - section 4.5.4 - . -

-
Note
-

Since an interface object for a non-callback interface is a function object the typeof operator will return - "function" when applied to - such an interface object.

-
-

- The internal [[Prototype]] property - of an interface object for a callback interface MUST be - the Object.prototype object. -

-
Note
-

Remember that interface objects for callback interfaces only exist if they have - constants declared on them; - when they do exist, they are not function objects.

-
- -
-
4.5.1.1 Interface object [[Call]] method
- -

- If the interface is declared with a - [Constructor] extended attribute, - then the interface object - can be called as a function to create an object that implements that - interface. Interfaces that do not have a constructor will throw - an exception when called as a function. -

- - -

- In order to define how overloaded constructor invocations are resolved, the - overload resolution algorithm - is defined. Its input is an effective overload set, - S, and a list of ECMAScript values, arg0..n−1. - Its output is a pair consisting of the operation or - extended attribute of one of S’s entries - and a list of IDL values or the special value “missing”. The algorithm behaves as follows: -

-
    -
  1. Let maxarg be the length of the longest type list of the entries in S.
  2. -
  3. Initialize argcount to be min(maxargn).
  4. - -
  5. Remove from S all entries whose type list is not of length argcount.
  6. - -
  7. If S is empty, then throw a TypeError.
  8. - -
  9. Initialize d to −1.
  10. - -
  11. - Initialize method to - undefined. -
  12. - -
  13. If there is more than one entry in S, then set - d to be the distinguishing argument index - for the entries of S.
  14. - -
  15. Initialize values to be an empty list, where each entry will be either an IDL value or the special value “missing”.
  16. - -
  17. Initialize i to 0.
  18. - -
  19. While i < d: -
      -
    1. Let V be argi.
    2. -
    3. Let type be the type at index i in the type list of any entry in S. -
      Note

      All entries in S at this point have the same type and optionality value at index i.

      -
    4. -
    5. Let optionality be the value at index i in the list of optionality values of any entry in S.
    6. -
    7. If optionality is “optional” and V is undefined, then: -
        -
      1. If the argument at index i is declared with a default value, - then append to values that default value.
      2. -
      3. Otherwise, append to values the special value “missing”.
      4. -
      -
    8. -
    9. Otherwise, append to values the result of converting - V to IDL type type.
    10. -
    11. Set i to i + 1.
    12. -
    -
  20. - -
  21. If i = d, then: -
      -
    1. Let V be argi. -
      Note

      This is the argument that will be used to resolve which overload is selected.

    2. - -
    3. If V is undefined, and there is an entry in S - whose list of optionality values has “optional” at index i, - then remove from S all other entries.
    4. - -
    5. Otherwise: if V is null or undefined, - and there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    6. - -
    7. - Otherwise: if V is a platform object – but not a - platform array object – and - there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    8. - - - - - -
    9. - Otherwise: if V is a DOMException platform object and - there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    10. - -
    11. - Otherwise: if V is an Error object (that is, it has an [[ErrorData]] internal slot) and - there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    12. - -
    13. - Otherwise: if V is an object with an [[ArrayBufferData]] internal slot and - there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    14. - -
    15. - Otherwise: if V is an object with a [[DataView]] internal slot and - there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    16. - -
    17. - Otherwise: if V is an object with a [[TypedArrayName]] internal slot and - there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    18. - -
    19. - Otherwise: if IsCallable(V) is true, - and there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    20. - -
    21. - Otherwise: if V object, and - there is an entry in S that has one of the - following types at position i of its type list, - - and after performing the following steps, -
        -
      1. - Let method be the result of - GetMethod(V, @@iterator). -
      2. -
      3. - ReturnIfAbrupt(method). -
      4. -
      - method is not undefined, then remove from S all - other entries. -
    22. - -
    23. - Otherwise: if V object, and - there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    24. - -
    25. - Otherwise: if V is a Boolean value, - and there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    26. - -
    27. - Otherwise: if V is a Number value, - and there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    28. - -
    29. - Otherwise: if there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    30. - -
    31. - Otherwise: if there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    32. - -
    33. - Otherwise: if there is an entry in S that has one of the following types at position i of its type list, - - then remove from S all other entries. -
    34. - -
    35. - Otherwise: if there is an entry in S that has any at position i - of its type list, - then remove from S all other entries. -
    36. - -
    37. - Otherwise: - throw a TypeError. -
    38. -
    -
  22. - -
  23. Let callable be the operation or extended attribute - of the single entry in S.
  24. - -
  25. - If i = d and method is not undefined, then -
      -
    1. - Let V be argi. -
    2. -
    3. - Let T be the type at index i in the - type list of the remaining entry in S. -
    4. -
    5. - If T is a sequence type, then - append to values the result of - creating a sequence - of type T from - V and method. -
    6. - -
    7. - Set i to i + 1. -
    8. -
    -
  26. - -
  27. - While i < argcount: -
      -
    1. Let V be argi.
    2. -
    3. Let type be the type at index i in the type list of the remaining entry in S.
    4. -
    5. Let optionality be the value at index i in the list of optionality values of the remaining entry in S.
    6. -
    7. If optionality is “optional” and V is undefined, then: -
        -
      1. If the argument at index i is declared with a default value, - then append to values that default value.
      2. -
      3. Otherwise, append to values the special value “missing”.
      4. -
      -
    8. -
    9. Otherwise, append to values the result of - converting V to IDL type type.
    10. -
    11. Set i to i + 1.
    12. -
    -
  28. - -
  29. While i is less than the number of arguments callable is declared to take: -
      -
    1. If callable’s argument at index i is declared with a default value, - then append to values that default value.
    2. -
    3. Otherwise, if callable’s argument at index i is not variadic, then append to values the special value “missing”.
    4. -
    5. Set i to i + 1.
    6. -
    -
  30. - -
  31. Return the pair <callable, values>.
  32. -
-
Note
-

- The overload resolution algorithm performs both the identification - of which overloaded operation, constructor, etc. is being called, - and the conversion of the ECMAScript argument values to their - corresponding IDL values. Informally, it operates as follows. -

-

First, the selection of valid overloads is done by considering - the number of ECMAScript arguments that were passed in to the function:

-
    -
  • If there are more arguments passed in than the longest - overload argument list, then they are ignored.
  • -
  • After ignoring these trailing arguments, only overloads - that can take this exact number of arguments are considered. - If there are none, then a TypeError is thrown.
  • -
-

Once we have a set of possible overloads with the right number - of arguments, the ECMAScript values are converted from left to right. - The nature of the restrictions on overloading means that if we - have multiple possible overloads at this point, then there will - be one position in the argument list that will be used to - distinguish which overload we will finally select; this is - the distinguishing - argument index.

-

We first convert the arguments to the left of the distinguishing - argument. (There is a requirement that an argument to the left of - the distinguishing argument index has the same type as in the other - overloads, at the same index.) Then we inspect the type of the - ECMAScript value that is passed in at the distinguishing argument - index to determine which IDL type it may correspond to. - This allows us to select the final overload that will - be invoked. If the value passed in is undefined - and there is an overload with an optional argument at this position, then - we will choose that overload. If there is no valid overload for the type of - value passed in here, then we throw a TypeError. - The inspection of the value at the distinguishing argument index does not have any side effects; - the only side effects that come from running the overload resolution - algorithm are those that come from converting the ECMAScript values - to IDL values.

-

At this point, we have determined which overload to use. We now - convert the remaining arguments, from the distinguishing argument onwards, - again ignoring any additional arguments that were ignored due to being passed - after the last possible argument.

-

When converting an optional argument’s ECMAScript value to its equivalent IDL value, - undefined will be converted into - the optional argument’s default value, - if it has one, or a special value “missing” otherwise.

-

Optional arguments corresponding to a final, variadic argument do not treat - undefined as a special “missing” value, however. - The undefined value is converted to the type - of variadic argument as would be done for a non-optional argument.

-
-

- The internal [[Call]] method - of the interface object behaves as follows, assuming - arg0..n−1 is the list - of argument values passed to the constructor, and I - is the interface: -

-
    -
  1. - If I was not declared with a [Constructor] - extended attribute, then - throw a TypeError. -
  2. -
  3. - Let id be the identifier of interface I. -
  4. -
  5. - Initialize S to the - effective overload set - for constructors with identifier - id on interface - I and with argument count n. -
  6. -
  7. - Let <constructor, values> be the result of passing S and - arg0..n−1 to the - overload resolution algorithm. -
  8. -
  9. - Let R be the result of performing the actions listed in the description of - constructor with values as the argument values. -
  10. -
  11. - Return the result of converting - R to an ECMAScript interface type value - I. -
  12. -
-

- If the internal [[Call]] method - of the interface object - returns normally, then it MUST - return an object that implements interface I. - This object also MUST be - associated with the ECMAScript global environment associated - with the interface object. -

-

- Interface objects for non-callback interfaces MUST have a property named “length” - with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } - whose value is a Number. - If the [Constructor] - extended attribute - does not appear on the interface definition, then the value is 0. - Otherwise, the value is determined as follows: -

-
    -
  1. - Let id be the identifier of interface I. -
  2. -
  3. - Initialize S to the - effective overload set - for constructors with - identifier - id on interface - I and with argument count 0. -
  4. -
  5. - Return the length of the shortest argument list of the entries in S. -
  6. -
-

- All interface objects MUST have a - property named “name” with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } - whose value is the identifier of the corresponding interface. -

-
- -
-
4.5.1.2 Interface object [[HasInstance]] method
- -

- The internal [[HasInstance]] method of every - interface object - A MUST behave as follows, - assuming V is the object - argument passed to [[HasInstance]]: -

-
    -
  1. If V is not an object, return false.
  2. -
  3. Let O be the result of calling the [[Get]] method of A with property name “prototype”.
  4. -
  5. If O is not an object, throw a TypeError exception.
  6. -
  7. If V is a platform object that implements the - interface for which O is the interface prototype object, - return true.
  8. -
  9. Repeat: -
      -
    1. Set V to the value of the [[Prototype]] internal property of V.
    2. -
    3. If V is null, return false.
    4. -
    5. If O and V refer to the same object, - return true.
    6. -
    -
  10. -
-
-
- -
-

4.5.2 Named constructors

- -

- A named constructor - that exists due to one or more - [NamedConstructor] - extended attributes - with a given identifier - is a function object. - It MUST have a [[Call]] - internal property, which allows construction of objects that - implement the interface on which the - [NamedConstructor] - extended attributes appear. It behaves as follows, assuming - arg0..n−1 is the list - of argument values passed to the constructor, id - is the identifier of the constructor specified in the - extended attribute named argument list, - and I is the interface - on which the [NamedConstructor] - extended attribute appears: -

-
    -
  1. - Initialize S to the - effective overload set - for constructors with identifier - id on interface - I and with argument count n. -
  2. -
  3. - Let <constructor, values> be the result of passing S and - arg0..n−1 to the - overload resolution algorithm. -
  4. -
  5. - Let R be the result of performing the actions listed in the description of - constructor with values as the argument values. -
  6. -
  7. - Return the result of converting - R to an ECMAScript - interface type value - I. -
  8. -
-

- If the internal [[Call]] method - of the named constructor - returns normally, then it MUST - return an object that implements interface I. - This object also MUST be - associated with the ECMAScript global environment associated - with the named constructor. -

-

- A named constructor MUST have a property named “length” - with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } - whose value is a Number determined as follows: -

-
    -
  1. - Initialize S to the - effective overload set - for constructors with - identifier - id on interface - I and with argument count 0. -
  2. -
  3. - Return the length of the shortest argument list of the entries in S. -
  4. -
-

- A named constructor MUST have a property named “name” - with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } - whose value is the identifier used for the named constructor. -

-

- A named constructor MUST also have a property named - “prototype” with attributes - { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: false } - whose value is the interface prototype object - for the interface on which the - [NamedConstructor] - extended attribute - appears. -

-
- -
-

4.5.3 Dictionary constructors

- -

- For every dictionary type - that has one or more [Constructor] - extended attributes - and which is exposed in a given - ECMAScript global environment, a corresponding property MUST exist on the - ECMAScript environment's global object. The name of the property is the - identifier of the dictionary, - and its value is a function object - called the dictionary constructor. -

-

- The property has the attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. -

-

- The internal [[Call]] method of the interface - object behaves as follows, assuming - arg0..n−1 is the list - of argument values passed to the constructor, and D - is the dictionary type: -

-
    -
  1. - Let id be the identifier of dictionary type D. -
  2. -
  3. - Initialize S to the - effective overload set - for constructors with identifier - id on dictionary type - D and with argument count n. -
  4. -
  5. - Let <constructor, values> be the result of passing S and - arg0..n−1 to the - overload resolution algorithm. -
  6. -
  7. - Let R be the result of performing the actions listed in the description of - constructor with values as the argument values. -
  8. -
  9. - Return the result of converting - R, which is a dictionary value of type D, to an ECMAScript value. -
  10. -
-

- If the internal [[Call]] method - of the named constructor - returns normally, then it MUST - return an object that is - associated with the ECMAScript global environment associated - with the dictionary constructor. -

-

- A dictionary constructor object MUST have a property named “length” - with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } - whose value is a Number determined as follows: -

-
    -
  1. - Let id be the identifier of the dictionary type. -
  2. -
  3. - Initialize S to the - effective overload set - for constructors with - identifier - id on dictionary D and with argument count 0. -
  4. -
  5. - Return the length of the shortest argument list of the entries in S. -
  6. -
-

- A dictionary constructor object MUST have a property named “name” - with attributes { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: true } - whose value is the identifier of the dictionary. -

-
- -
-

4.5.4 Interface prototype object

- -

- There MUST exist an - interface prototype - object for every non-callback interface - defined, regardless of whether the interface was declared with the - [NoInterfaceObject] - extended attribute. - The interface prototype object for a particular interface has - properties that correspond to the regular attributes - and regular operations - defined on that interface. These properties are described in more detail in - sections 4.5.7 Attributes4.5.7 and - 4.5.8 Operations4.5.8 . -

-

- As with the interface object, - the interface prototype object also has properties that correspond to the - constants defined on that - interface, described in section - 4.5.8 . -

-

- If the [NoInterfaceObject] - extended attribute was not specified on the interface, then - the interface prototype object MUST - also have a property named “constructor” with attributes - { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true } whose value - is a reference to the interface object for the interface. -

- -

- The interface prototype object - for a given interface A MUST have an internal - [[Prototype]] property whose value is returned from - the following steps: -

-
    -
  1. If A is declared with the [Global] - or [PrimaryGlobal] - extended attribute, and A - supports named properties, then - return the named properties object - for A, as defined in section 4.5.5 - .
  2. -
  3. Otherwise, if A is declared to inherit from another - interface, then return the - interface prototype object - for the inherited interface.
  4. - -
  5. Otherwise, return %ObjectPrototype% ( - [ECMA-262] - , section 6.1.7.4). - ([ECMA-262], section 15.2.4).
  6. -
-
Note
-

- The interface prototype object - of an interface that is defined with - the [NoInterfaceObject] - extended attribute - will be accessible if the interface is used as a - non-supplemental interface. - For example, with the following IDL: -

-
IDL
[NoInterfaceObject]
-interface Foo {
-};
-
-partial interface Window {
-  attribute Foo foo;
-};
-

- it is not possible to access the interface prototype object through - the interface object - (since it does not exist as window.Foo). However, an instance - of Foo can expose the interface prototype - object by gettings its internal [[Prototype]] - property value – Object.getPrototypeOf(window.foo) in - this example. -

-

- If the interface is used solely as a - supplemental interface, - then there will be no way to access its interface prototype object, since no - object will have the interface prototype object as its internal - [[Prototype]] property value. In such cases, - it is an acceptable optimization for this object not to exist. -

-
- - -

- The class string of an - interface prototype object - is the concatenation of the interface’s - identifier and the string - “Prototype”. -

-
- -
-

4.5.5 Named properties object

- -

- For every interface declared with the - [Global] or - [PrimaryGlobal] - extended attribute - that supports named properties, - there MUST exist an object known as the - named properties object for that - interface. -

-

- The named properties object - for a given interface A MUST have an internal - [[Prototype]] property whose value is returned from - the following steps: -

-
    -
  1. If A is declared to inherit from another interface, then return the - interface prototype object - for the inherited interface.
  2. - -
  3. Otherwise, return %ObjectPrototype% ( - [ECMA-262] - , section 6.1.7.4).
  4. -
-

- The class string of a - named properties object - is the concatenation of the interface’s - identifier and the string - “Properties”. -

- -
-
4.5.5.1 Named properties object [[GetOwnProperty]] method
- -

- The internal [[GetOwnProperty]] method of every - named properties object - MUST behave as follows when called with object O - and property name P: -

- -
    -
  1. Let A be the interface for the - named properties object O.
  2. -
  3. Let object be the sole object from O’s ECMAScript global environment that implements A. -
    Note
    -

    For example, if the interface is the Window - interface as defined in HTML5 ([HTML5], section 5.2), then the sole object - will be this global environment’s window object.

    -
    -
  4. -
  5. If the result of running the named property visibility algorithm with - property name P and object object is true, then: -
      -
    1. Let operation be the operation used to declare the named property getter.
    2. - -
    3. Let value be an uninitialized variable.
    4. -
    5. If operation was defined without an identifier, then - set value to the result of performing the steps listed in the interface description to - determine the value of a named property - with P as the name.
    6. -
    7. Otherwise, operation was defined with an identifier. Set value to the result - of performing the steps listed in the description of operation with P as the only argument value.
    8. - -
    9. Let desc be a newly created Property Descriptor ( - [ECMA-262] - , section 6.2.4) with no fields.
    10. -
    11. Set desc.[[Value]] to the result of converting - value to an ECMAScript value.
    12. -
    13. If the named property is defined to be unenumerable, - then set desc.[[Enumerable]] to false, - otherwise set it to true.
    14. -
    15. Set desc.[[Writable]] to true and - desc.[[Configurable]] to true.
    16. -
    17. Return desc.
    18. -
    -
  6. - -
  7. Return the result of calling the default [[GetOwnProperty]] internal method ( - [ECMA-262] - , section 9.1.5) on O passing P as the argument.
  8. -
-
- -
-
4.5.5.2 Named properties object [[DefineOwnProperty]] method
- -

- The internal [[DefineOwnProperty]] method of every - named properties object - MUST behave as follows when called with object O - and property name P. The term “Reject” is used as defined in - section . -

- -
    -
  1. Reject.
  2. -
-
- -
-
4.5.5.3 Named properties object [[Delete]] method
- -

- The internal [[Delete]] method of every - named properties object - MUST behave as follows when called with object O and - property name P. -

- -
    -
  1. Return false.
  2. -
-
-
- - - -
-

4.5.6 Constants

- -

- For each exposed - constant defined on - an interface A, there - MUST be a corresponding property. - The property has the following characteristics: -

-
    -
  • The name of the property is the identifier of the constant.
  • -
  • - The location of the property is determined as follows: - -
  • -
  • The value of the property is that which is obtained by converting the constant’s IDL value to an ECMAScript value.
  • -
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • -
-

- In addition, a property with the same characteristics MUST - exist on the interface object, if - that object exists. -

-
- -
-

4.5.7 Attributes

- -

- For each exposed - attribute of the - interface, whether it - was declared on the interface itself or one of its - consequential interfaces, - there MUST exist a corresponding property. - The characteristics of this property are as follows: -

-
    -
  • - The name of the property is the identifier of the attribute. -
  • -
  • - The location of the property is determined as follows: - -
  • -
  • - The property has attributes { [[Get]]: G, [[Set]]: S, [[Enumerable]]: true, [[Configurable]]: configurable }, - where: - -
  • -
  • - The attribute getter is a Function - object whose behavior when invoked is as follows: -
      -
    1. Let idlValue be an IDL value determined as follows.
    2. -
    3. If the attribute is a regular attribute, then: -
        -
      1. Let I be the interface - whose interface prototype object - this property corresponding to the attribute appears on. -
        Note
        -

        This means that even if an implements statement was used to make - an attribute available on the interface, I is the interface - on the left hand side of the implements statement, and not the one - that the attribute was originally declared on.

        -
        -
      2. -
      3. Let O be the this value.
      4. -
      5. If O is a platform object, - then perform a security check on O - with the ECMAScript global environment associated with this Function that - implements the attribute getter.
      6. -
      7. If O is not a platform object that implements I, then: -
          -
        1. If the attribute was specified with the - [LenientThis] extended attribute, - then return undefined.
        2. -
        3. Otherwise, throw a TypeError.
        4. -
        -
      8. -
      9. - Set idlValue to be the result of performing the actions listed in the description of the attribute that occur when getting - (or those listed in the description of the inherited attribute, if this attribute is declared to - inherit its getter), - with O as the object. -
      10. -
      -
    4. -
    5. Otherwise, the attribute is a static attribute. - Set idlValue to be the result of performing the actions listed in the description of the attribute that occur when getting.
    6. -
    7. - Let V be the result of converting - idlValue to an ECMAScript value. -
    8. -
    9. - Return V. -
    10. -
    - The value of the Function object’s “length” - property is the Number value 0. -
  • -
  • - The attribute setter is undefined - if the attribute is declared readonly and has neither a - [PutForwards] nor a [Replaceable] - extended attribute declared on it. - Otherwise, it is a Function object whose behavior when invoked is as follows: -
      -
    1. If no arguments were passed to the Function, then - throw a TypeError.
    2. -
    3. Let V be the value of the first argument passed to the Function.
    4. -
    5. If the attribute is a regular attribute, then: -
        -
      1. Let I be the interface - whose interface prototype object - this property corresponding to the attribute appears on.
      2. -
      3. Let O be the this value.
      4. -
      5. If O is a platform object, - then perform a security check on O - with the ECMAScript global environment associated with this Function that - implements the attribute setter.
      6. -
      7. Let validThis be true if O is a - platform object that implements I, or - false otherwise.
      8. -
      9. If validThis is false and the - attribute was not specified with the - [LenientThis] extended attribute, - then throw a TypeError.
      10. -
      11. If the attribute is declared with a [Replaceable] - extended attribute, then: -
          -
        1. Let P be the identifier of the attribute.
        2. -
        3. Call the [[DefineOwnProperty]] method of O - passing property name P, Property Descriptor - { [[Value]]: V, [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true }, - and false.
        4. -
        5. Return undefined.
        6. -
        -
      12. -
      13. If validThis is false, then return undefined.
      14. -
      15. If the attribute is declared with a [PutForwards] - extended attribute, then: -
          -
        1. Let Q be the result of calling the [[Get]] method - on O using the identifier of the attribute as the property name.
        2. -
        3. If Q is not an object, then throw a TypeError.
        4. -
        5. Let A be the attribute identified by the [PutForwards] extended attribute.
        6. -
        7. Call the [[Put]] method on Q - using the identifier of A as the property name and V as the value.
        8. -
        9. Return undefined.
        10. -
        -
      16. -
      -
    6. -
    7. Let idlValue be an IDL value determined as follows: -
        -
      • If the type of the attribute is an enumeration, then: -
          -
        1. Let S be the result of calling ToString(V).
        2. -
        3. If S is not one of the enumeration’s values, then return undefined.
        4. -
        5. The value of idlValue is the enumeration value equal to S.
        6. -
        -
      • -
      • Otherwise, the type of the attribute is not an enumeration. - The value of idlValue is the result of converting - V to an IDL value.
      • -
    8. -
    9. If the attribute is a regular attribute, then perform the actions listed in the description of the attribute that occur when setting, - with O as the object and idlValue as the value.
    10. -
    11. Otherwise, the attribute is a static attribute. - Perform the actions listed in the description of the attribute that occur when setting with idlValue as the value.
    12. -
    13. Return undefined.
    14. -
    - The value of the Function object’s “length” - property is the Number value 1. -
  • -
-
Note
-

- Although there is only a single property for an IDL attribute, since - accessor property getters and setters are passed a this - value for the object on which property corresponding to the IDL attribute is - accessed, they are able to expose instance-specific data. -

-
-
Note
-

- Note that attempting to assign to a property corresponding to a - read only attribute - results in different behavior depending on whether the script doing so is in strict mode. - When in strict mode, such an assignment will result in a TypeError - being thrown. When not in strict mode, the assignment attempt will be ignored. -

-
-
- -
-

4.5.8 Operations

- -

- For each unique identifier - of an exposed operation - defined on the interface, there - MUST exist a corresponding property, - unless the effective overload set - for that identifier and operation - and with an argument count of 0 has no entries. - The characteristics of this property are as follows: -

-
    -
  • The name of the property is the identifier.
  • -
  • - The location of the property is determined as follows: - -
  • -
  • - The property has attributes - { [[Writable]]: B, [[Enumerable]]: true, [[Configurable]]: B }, - where B is false if the operation is - unforgeable on the interface, - and true otherwise. -
  • -
  • - The value of the property is a Function object whose - behavior is as follows, - assuming id is the - identifier, - arg0..n−1 is the list - of argument values passed to the function: -
      -
    1. - Try running the following steps: -
        -
      1. - Let I be the interface - whose interface prototype object - (or interface object, for a static - operation) this property corresponding to the operation appears on. -
        Note
        -

        This means that even if an implements statement was used to make - an operation available on the interface, I is the interface - on the left hand side of the implements statement, and not the one - that the operation was originally declared on.

        -
        -
      2. -
      3. - Let O be a value determined as follows: -
          -
        • - If the operation is a static operation, then O is null. -
        • - -
        • - Otherwise, if the this value is not null, - then O is the this value. -
        • -
        • - Otherwise, throw a TypeError. -
        • -
        -
      4. -
      5. If O is a platform object, - then perform a security check on O - with the ECMAScript global environment associated with this Function that - implements the operation.
      6. -
      7. - If O is not null and is also not a platform object - that implements interface I, throw a TypeError. -
      8. -
      9. - Initialize S to the - effective overload set - for regular operations - (if the operation is a regular operation) or for - static operations - (if the operation is a static operation) with - identifier - id on interface - I and with argument count n. -
      10. -
      11. - Let <operation, values> be the result of passing S and - arg0..n−1 to the - overload resolution algorithm. -
      12. -
      13. - Let R be the result of performing (on O, if the operation - is not a static operation) the actions listed in the description of - operation with values as the argument values. -
      14. -
      15. - Return the result of converting - R to an ECMAScript value of - the type op is declared to return. -
      16. -
      - And then, if an exception was thrown: -
        -
      1. If the operation has a return type - that is a promise type, then: -
          -
        1. Let reject be the initial value of %Promise%.reject.
        2. -
        3. Return the result of calling reject with %Promise% as the - this object and the exception as the single - argument value.
        4. -
        -
      2. -
      3. Otherwise, end these steps and allow the exception to propagate.
      4. -
      -
    2. -
    -
  • -
  • - The value of the Function object’s “length” - property is a Number determined as follows: -
      -
    1. - Let S be the - effective overload set - for regular operations - (if the operation is a regular operation) or for - static operations - (if the operation is a static operation) with - identifier - id on interface - I and with argument count 0. -
    2. -
    3. - Return the length of the shortest argument list of the entries in S. -
    4. -
    -
  • -
- -
-
4.5.8.1 Stringifiers
- -

- If the interface - has an exposed - stringifier, then - there MUST exist a property with - the following characteristics: -

-
    -
  • The name of the property is “toString”.
  • -
  • If the stringifier is - unforgeable on the interface - or if the interface was declared with the [Global] or [PrimaryGlobal] extended attribute, - then the property exists on every object that implements the interface. - Otherwise, the property exists on the interface prototype object.
  • -
  • The property has attributes { [[Writable]]: B, [[Enumerable]]: true, [[Configurable]]: B }, - where B is false if the stringifier is - unforgeable on the interface, - and true otherwise.
  • -
  • -

    The value of the property is a Function object, which behaves as follows:

    -
      -
    1. Let O be the result of calling ToObject on the this value.
    2. -
    3. If O is a platform object, - then perform a security check on O - with the ECMAScript global environment associated with this Function that - implements the stringifier.
    4. -
    5. If O is not an object that implements the interface - on which the stringifier was declared, then throw a TypeError.
    6. -
    7. Let V be an uninitialized variable.
    8. -
    9. Depending on where stringifier was specified: -
      -
      on an attribute
      -
      Set V to the result of performing the actions listed in the description of the attribute that occur when getting - (or those listed in the description of the inherited attribute, if this attribute is declared to - inherit its getter), - with O as the object.
      -
      on an operation with an identifier
      -
      Set V to the result of performing the actions listed in the description - of the operation, using O as the this value - and passing no arguments.
      -
      on an operation with no identifier
      -
      Set V to the result of performing the stringification behavior - of the interface.
      -
      -
    10. -
    11. Return the result of converting V to a String value.
    12. -
    -

    The value of the Function object’s “length” - property is the Number value 0.

    -
  • -
-
- -
-
4.5.8.2 Serializers
- -

- If the interface - has an exposed - serializer, then - a property MUST exist - whose name is “toJSON”, - with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } - and whose value is a - Function object. -

-

- The location of the property is determined as follows: -

- -

- The property’s Function object, when invoked, - MUST behave as follows: -

-
    -
  1. Let O be the result of calling ToObject on the this value.
  2. -
  3. If O is a platform object, - then perform a security check on O - with the ECMAScript global environment associated with this Function that - implements the serializer.
  4. -
  5. If O is not an object that implements the interface - on which the serializer was declared, then throw a TypeError.
  6. -
  7. Depending on how serializer was specified: -
    -
    on an operation with an identifier
    -
    -
      -
    1. Return the result of performing the actions listed in the description of the - operation, using O as the this value - and passing no arguments.
    2. -
    -
    -
    as a keyword, either with or without a serialization pattern
    -
    -
      -
    1. Let S be the serialized value that is the result of invoking the serialization behavior of the - interface for object O.
    2. -
    3. Return the result of converting - S to an ECMAScript value.
    4. -
    -
    -
    -
  8. -
-

- The following steps define how to convert a serialized value to an ECMAScript value: -

-
    -
  1. Let S be the serialized value.
  2. -
  3. Depending on the type of S: -
    -
    a map
    -
    -
      -
    1. Let O be a new object created as if by the expression ({}).
    2. -
    3. For each entry in S, in the order they were added to the map: -
        -
      1. Let V be the result of converting - the value of the entry to an ECMAScript value.
      2. -
      3. Let P be the entry’s key.
      4. -
      5. Call the [[DefineOwnProperty]] internal method of O passing - property name P, Property Descriptor { [[Value]]: V, - [[Writable]]: true, [[Enumerable]]: true, - [[Configurable]]: true }, and false - as arguments.
      6. -
      -
    4. -
    5. Return O.
    6. -
    -
    -
    a list
    -
    -
      -
    1. Let A be a new Array object created as if by the expression [].
    2. -
    3. Let index be 0.
    4. -
    5. While index is less than the number of elements in S: -
        -
      1. Let V be the result of converting - the value of the element in S at index index to an ECMAScript value.
      2. -
      3. Let P be ToString(index).
      4. -
      5. Call the [[DefineOwnProperty]] internal method of O passing - property name P, Property Descriptor { [[Value]]: V, - [[Writable]]: true, [[Enumerable]]: true, - [[Configurable]]: true }, and false - as arguments.
      6. -
      -
    6. -
    7. Return A.
    8. -
    -
    -
    any other serialized value
    -
    -
      -
    1. Let V be the result of converting - S to an ECMAScript value.
    2. -
    3. Return V.
    4. -
    -
    -
    -
  4. -
-
-
- -
-

4.5.9 Common iterator behavior

- -
-
4.5.9.1 @@iterator
- -

- If the interface - has any of the following: -

- -

- then a property MUST exist - whose name is the @@iterator symbol, - with attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true } - and whose value is a function object. -

-

- The location of the property is determined as follows: -

- -

- If the interface has an iterable declaration, - then the Function, when invoked, - MUST behave as follows: -

-
    -
  1. Let object be the result of calling ToObject on the this value.
  2. -
  3. If object is a platform object, - then perform a security check on object - with the ECMAScript global environment associated with this Function.
  4. -
  5. Let interface be the interface - the iterable declaration is on.
  6. -
  7. If object is not a platform object - that implements interface, - then throw a TypeError.
  8. -
  9. Let iterator be a newly created default iterator object - for interface with object as its target and iterator kind “value”.
  10. -
  11. Return iterator.
  12. -
-

- If the interface does not have an iterable declaration - but does define an indexed property getter, - then the Function object is %ArrayProto_values% ( - [ECMA-262] - , section 6.1.7.4). -

- - -

- The value of the @@iterator Function object’s “length” - property is the Number value 0. -

-
- -
-
4.5.9.2 forEach
- -

- If the interface - has any of the following: -

- -

- then a property named “forEach” MUST exist - with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } - and whose value is a function object. -

-

- The location of the property is determined as follows: -

- -

- If the interface has an iterable declaration, - then the Function MUST - have the same behavior as one that would exist assuming the interface had - this operation instead of the - iterable declaration: -

-
IDL
void forEach(Function callback, optional any thisArg = undefined);
-

- with the following prose definition: -

-
    -
  1. Let values be the list of values to iterate over.
  2. -
  3. Let len be the length of values.
  4. -
  5. Initialize k to 0.
  6. -
  7. While k < len: -
      -
    1. Let kValue be the value in values at index k.
    2. -
    3. Invoke callback with thisArg - as the callback this value and - k and value as its arguments.
    4. -
    -
  8. -
- - -

- The value of the Function object’s “length” - property is the Number value 1. -

-
-
- -
-

4.5.10 Iterable declarations

- -
-
4.5.10.1 entries
- -

- If the interface has an - iterable declaration, - then a property named “entries” MUST exist - with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } - and whose value is a function object. -

-

- The location of the property is determined as follows: -

- -

- The Function, when invoked, MUST behave as follows: -

-
    -
  1. Let object be the result of calling ToObject on the this value.
  2. -
  3. If object is a platform object, - then perform a security check on object - with the ECMAScript global environment associated with this Function.
  4. -
  5. Let interface be the interface - on which the iterable declaration is declared on.
  6. -
  7. If object is not a platform object - that implements interface, - then throw a TypeError.
  8. -
  9. Let iterator be a newly created default iterator object - for interface with object as its target and iterator kind “key+value”.
  10. -
  11. Return iterator.
  12. -
-

The value of the Function object’s “length” property is the Number value 0.

-
- -
-
4.5.10.2 keys
- -

- If the interface has an - iterable declaration, - then a property named “keys” MUST exist - with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } - and whose value is a function object. -

-

- The location of the property is determined as follows: -

- -

- The Function, when invoked, MUST behave as follows: -

-
    -
  1. Let object be the result of calling ToObject on the this value.
  2. -
  3. If object is a platform object, - then perform a security check on object - with the ECMAScript global environment associated with this Function.
  4. -
  5. Let interface be the interface - on which the iterable declaration is declared on.
  6. -
  7. If object is not a platform object - that implements interface, - then throw a TypeError.
  8. -
  9. Let iterator be a newly created default iterator object - for interface with object as its target and iterator kind “key”.
  10. -
  11. Return iterator.
  12. -
-

The value of the Function object’s “length” property is the Number value 0.

-
- -
-
4.5.10.3 values
- -

- If the interface has an - iterable declaration, - then a property named “values” MUST exist - with attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } - and whose value is the function object - that is the value of the @@iterator property. -

-

- The location of the property is determined as follows: -

- -

The value of the Function object’s “length” property is the Number value 0.

-
- -
-
4.5.10.4 Default iterator objects
- -

- A default iterator object for a given - interface, target and iteration kind - is an object whose internal [[Prototype]] property is the - iterator prototype object - for the interface. -

-

- A default iterator object - has three internal values: -

-
    -
  1. its target, which is an object whose values are to be iterated,
  2. -
  3. its kind, which is the iteration kind,
  4. -
  5. its index, which is the current index into the values value to be iterated.
  6. -
-

- When a default iterator object is first created, - its index is set to 0. -

-

- The class string of a - default iterator object - for a given interface - is the result of concatenting the identifier - of the interface and - the string “ Iterator”. -

-
- -
-
4.5.10.5 Iterator prototype object
- -

- The iterator prototype object - for a given interface - is an object that exists for every interface that has an - iterable declaration. It serves as the - prototype for default iterator objects - for the interface. -

-

- The internal [[Prototype]] property of an iterator prototype object - MUST be %IteratorPrototype% ( - [ECMA-262] - , section 6.1.7.4). -

-

- An iterator prototype object - MUST have a property named “next” with - attributes { [[Writable]]: true, [[Enumerable]]: true, [[Configurable]]: true } - and whose value is a function object - that behaves as follows: -

-
    -
  1. Let interface be the interface for which the - iterator prototype object exists.
  2. -
  3. Let object be the result of calling ToObject on the this value.
  4. -
  5. If object is a platform object, - then perform a security check on object - with the ECMAScript global environment associated with this Function that - implements the iterator’s next method.
  6. -
  7. If object is not a default iterator object for interface, - then throw a TypeError.
  8. -
  9. Let target be object’s target.
  10. -
  11. Let index be object’s index.
  12. -
  13. Let kind be object’s kind.
  14. -
  15. Let values be the list of values to iterate over. -
    Note
    -

    Depending on whether prose accompanying the interface defined this to be a snapshot at the time - iteration begins, the list of values might be different from the previous time the next - method was called on this iterator object.

    -
    -
  16. -
  17. Let len be the length of values.
  18. -
  19. If object’s index is greater than or equal to len, then - return CreateIterResultObject(undefined, true).
  20. -
  21. Let result be a value determined by the value of kind: -
    -
    key
    -
    -
      -
    1. Let key be the ECMAScript Number value index.
    2. -
    3. result is key.
    4. -
    -
    -
    value
    -
    -
      -
    1. Let idlValue be the value in values at index index.
    2. -
    3. Let value be the result of converting idlValue to an ECMAScript value.
    4. -
    5. result is value.
    6. -
    -
    -
    key+value
    -
    -
      -
    1. Let key be the ECMAScript Number value index.
    2. -
    3. Let idlValue be the value in values at index index.
    4. -
    5. Let value be the result of converting idlValue to an ECMAScript value.
    6. -
    7. Let array be the result of performing ArrayCreate(2).
    8. -
    9. Call CreateDataProperty(array, "0", key).
    10. -
    11. Call CreateDataProperty(array, "1", value).
    12. -
    13. result is array.
    14. -
    -
    -
    -
  22. -
  23. Return CreateIterResultObject(result, false).
  24. -
-

- The class string of an - iterator prototype object - for a given interface - is the result of concatenting the identifier - of the interface and - the string “Iterator”. -

-
-
- - - - - -
-

4.5.11 Initializing objects from iterables

- -

- Some objects, which are attempting to emulate map- and set-like interfaces, will want to accept iterables - as constructor parameters and initialize themselves in this way. Here we provide some algorithms that can - be invoked in order to do so in the same way as in the ECMAScript spec, so that those objects behave - the same as the built-in Map and Set objects. -

- -

- To add map elements from an iterable iterable to - an object destination with adder method name adder, perform the following steps: -

-
    -
  1. If Type(destination) is not Object, then, throw a TypeError exception.
  2. -
  3. If iterable is not present, let iterable be undefined.
  4. -
  5. If iterable is either undefined or null, then let iter be undefined.
  6. -
  7. Else, -
      -
    1. Let adder be the result of Get(destination, adder).
    2. -
    3. ReturnIfAbrupt(adder).
    4. -
    5. If IsCallable(adder) is false, throw a TypeError exception.
    6. -
    7. Let iter be the result of GetIterator(iterable).
    8. -
    9. ReturnIfAbrupt(iter).
    10. -
    -
  8. -
  9. If iter is undefined, then return.
  10. -
  11. Repeat -
      -
    1. Let next be the result of IteratorStep(iter).
    2. -
    3. ReturnIfAbrupt(next).
    4. -
    5. If next is false, then return NormalCompletion(destination).
    6. -
    7. Let nextItem be IteratorValue(next).
    8. -
    9. ReturnIfAbrupt(nextItem).
    10. -
    11. If Type(nextItem) is not Object, then throw a TypeError exception.
    12. -
    13. Let k be the result of Get(nextItem, '0').
    14. -
    15. ReturnIfAbrupt(k).
    16. -
    17. Let v be the result of Get(nextItem, '1').
    18. -
    19. ReturnIfAbrupt(v).
    20. -
    21. Let status be the result of calling the [[Call]] internal method of adder with destination as - thisArgument and (k, v) as argumentsList.
    22. -
    23. ReturnIfAbrupt(status).
    24. -
    -
  12. -
-
-
- -
-

4.6 Implements statements

- -

- The interface prototype object - of an interface A MUST have a copy of - each property that corresponds to one of the - constants, - attributes, - operations and - iterable declarations - that exist on all of the interface prototype objects of A’s - consequential interfaces. - For operations, where the property is a data property with a Function - object value, each copy of the property MUST have - distinct Function objects. For attributes, each - copy of the accessor property MUST have - distinct Function objects for their getters, - and similarly with their setters. -

-
Note
-

- When invoking an operation by calling - a Function object that is the value of one of the copies that exists - due to an implements statement, the this value is - checked to ensure that it is an object that implements the - interface corresponding to the - interface prototype object - that the property is on. -

-

- For example, consider the following IDL: -

-
IDL
interface A {
-  void f();
-};
-
-interface B { };
-B implements A;
-
-interface C { };
-C implements A;
-

- Attempting to call B.prototype.f on an object that implements - A (but not B) or one - that implements C will result in a - TypeError being thrown. However, - calling A.prototype.f on an object that implements - B or one that implements C - would succeed. This is handled by the algorithm in section 4.5.8 - that defines how IDL operation invocation works in ECMAScript. -

-

- Similar behavior is required for the getter and setter Function - objects that correspond to an IDL attributes, - and this is handled in section 4.5.7 . -

-
-
- -
-

4.7 Platform objects implementing interfaces

- -

- Every platform object is associated with a global environment, just - as the initial objects are. - It is the responsibility of specifications using Web IDL to state - which global environment (or, by proxy, which global object) each platform - object is associated with. -

-

- The primary interface of a platform object - that implements one or more interfaces is the most-derived non-supplemental interface - that it implements. The value of the internal [[Prototype]] - property of the platform object is the interface prototype object - of the primary interface - from the platform object’s associated global environment. -

-

- The global environment that a given platform object - is associated with can change after it has been created. When - the global environment associated with a platform object is changed, its internal - [[Prototype]] property MUST be immediately - updated to be the interface prototype object - of the primary interface - from the platform object’s newly associated global environment. -

- - - -

- Every platform object that implements an [Unforgeable]-annotated - interface and which does not have a stringifier - that is unforgeable on any of the - interfaces it implements MUST have a property with the - following characteristics: -

-
    -
  • The name of the property is “toString”.
  • -
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • -
  • The value of the property is %ObjProto_toString% ( - [ECMA-262] - , section 6.1.7.4), the initial value of Object.prototype.toString.
  • -
- -

- Every platform object that implements an [Unforgeable]-annotated - interface and which does not have a serializer - that is unforgeable on any of the - interfaces it implements MUST have a property with the - following characteristics: -

-
    -
  • The name of the property is “toJSON”.
  • -
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • -
  • The value of the property is undefined.
  • -
- -

- Every platform object that implements an [Unforgeable]-annotated - interface MUST have a property with the - following characteristics: -

-
    -
  • The name of the property is “valueOf”.
  • -
  • The property has attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }.
  • -
  • - The value of the property is a Function object whose behavior - is as follows: -
      -
    1. Return the this value.
    2. -
    - This Function object is the - default unforgeable valueOf function. - The value of the Function object’s “length” - property is the Number value 0. -
  • -
- -

- The class string of - a platform object that implements one or more interfaces - MUST be the identifier of - the primary interface - of the platform object. -

- -
-

4.7.1 Indexed and named properties

- -

- If a platform object implements an interface that - supports indexed or - named properties, - the object will appear to have additional properties that correspond to the - object’s indexed and named properties. These properties are not “real” own - properties on the object, but are made to look like they are by being exposed - by the [[GetOwnProperty]] internal method. -

-

- However, when the [Global] or - [PrimaryGlobal] - extended attribute has been used, - named properties are not exposed on the object but on another object - in the prototype chain, the named properties object. -

-

- It is permissible for an object to implement multiple interfaces that support indexed properties. - However, if so, and there are conflicting definitions as to the object’s - supported property indices, - or if one of the interfaces is a supplemental interface for the - platform object, then it is undefined what additional properties the object will appear to - have, or what its exact behavior will be with regard to its indexed properties. - The same applies for named properties. -

-

- The indexed property getter - that is defined on the derived-most interface that the - platform object implements is the one that defines the behavior - when indexing the object with an array index. Similarly for - indexed property setters. - This way, the definitions of these special operations from - ancestor interfaces can be overridden. -

- -

- Platform objects implementing an interface that supports indexed or named properties cannot be fixed; if Object.freeze, Object.seal - or Object.preventExtensions is called on one of these objects, the function - MUST throw a TypeError. - Similarly, an interface prototype object - that exposes named properties due to the use of [Global] or - [PrimaryGlobal] - also MUST throw a TypeError - if one of the three functions above is called on it. -

- -

- The name of each property that appears to exist due to an object supporting indexed properties - is an array index property name, which is a property - name P such that Type(P) is String - and for which the following algorithm returns true: -

-
    -
  1. Let i be ToUint32(P).
  2. -
  3. Let s be ToString(i).
  4. -
  5. If sP or i = 232 − 1, then return false.
  6. -
  7. Return true.
  8. -
- - -

- A property name is an unforgeable property name on a - given platform object if the object implements an interface that - has an interface member with that identifier - and that interface member is unforgeable on any of - the interfaces that O implements. If the object implements an - [Unforgeable]-annotated - interface, then “toString” and “valueOf” are - also unforgeable property names - on that object. -

-

- The named property visibility algorithm is used to determine if - a given named property is exposed on an object. Some named properties are not exposed on an object - depending on whether the [OverrideBuiltins] - extended attribute was used. The algorithm - operates as follows, with property name P and object O: -

- -
    -
  1. If P is an unforgeable property name - on O, then return false.
  2. -
  3. If O implements an interface with - an [Unforgeable]-annotated attribute - whose identifier is P, then return false.
  4. -
  5. If P is not a supported property name - of O, then return false.
  6. -
  7. If O implements an interface that has the [OverrideBuiltins] - extended attribute, then return true.
  8. -
  9. If O has an own property named P, then return false.
  10. -
  11. Initialize prototype to be the value of the internal [[Prototype]] property of O.
  12. -
  13. While prototype is not null: -
      -
    1. If prototype is not a named properties object, - and prototype has an own property named P, then return false.
    2. -
    3. Set prototype to be the value of the internal [[Prototype]] property of prototype.
    4. -
    -
  14. -
  15. Return true.
  16. -
-
Note
-

This should ensure that for objects with named properties, property resolution is done in the following order:

-
    -
  1. Indexed properties.
  2. -
  3. Unforgeable attributes and operations.
  4. -
  5. Then, if [OverrideBuiltins]: -
      -
    1. Named properties.
    2. -
    3. Own properties.
    4. -
    5. Properties from the prototype chain.
    6. -
    -
  6. -
  7. Otherwise, if not [OverrideBuiltins]: -
      -
    1. Own properties.
    2. - -
    3. Properties from the prototype chain.
    4. - -
    5. Named properties.
    6. -
    -
  8. -
-
-

- Support for getters is - handled by the platform object [[GetOwnProperty]] method - defined in section , and - for setters - by the platform object [[DefineOwnProperty]] method - defined in section and the platform object [[Set]] method - defined in section . -

-
- -
-

4.7.2 The PlatformObjectGetOwnProperty abstract operation

- -

- The PlatformObjectGetOwnProperty - abstract operation performs the following steps when called with an - object O, a property name P, and a boolean - ignoreNamedProps value: -

- -
    -
  1. - If O supports indexed properties - and P is an array index property name, then: -
      -
    1. Let index be the result of calling ToUint32(P).
    2. -
    3. If index is a supported property index, then: -
        -
      1. Let operation be the operation used to declare the indexed property getter.
      2. - -
      3. Let value be an uninitialized variable.
      4. -
      5. If operation was defined without an identifier, then - set value to the result of performing the steps listed in the interface description to - determine the value of an indexed property - with index as the index.
      6. -
      7. Otherwise, operation was defined with an identifier. Set value to the result - of performing the steps listed in the description of operation with index as the only argument value.
      8. - -
      9. Let desc be a newly created Property Descriptor ( - [ECMA-262] - , section 6.2.4) with no fields.
      10. -
      11. Set desc.[[Value]] to the result of converting - value to an ECMAScript value.
      12. -
      13. If O implements an interface with an indexed property setter, then set - desc.[[Writable]] to true, otherwise set it to - false.
      14. -
      15. Set desc.[[Enumerable]] and desc.[[Configurable]] to true.
      16. -
      17. Return desc.
      18. -
      -
    4. -
    5. Set ignoreNamedProps to true.
    6. -
    -
  2. - -
  3. If O supports named properties, O does not - implement an interface with the [Global] or [PrimaryGlobal] - extended attribute, the result of running the named property visibility algorithm with - property name P and object O is true, and ignoreNamedProps is false, then: -
      -
    1. Let operation be the operation used to declare the named property getter.
    2. - -
    3. Let value be an uninitialized variable.
    4. -
    5. If operation was defined without an identifier, then - set value to the result of performing the steps listed in the interface description to - determine the value of a named property - with P as the name.
    6. -
    7. Otherwise, operation was defined with an identifier. Set value to the result - of performing the steps listed in the description of operation with P as the only argument value.
    8. - -
    9. Let desc be a newly created Property Descriptor ( - [ECMA-262] - , section 6.2.4) with no fields.
    10. -
    11. Set desc.[[Value]] to the result of converting - value to an ECMAScript value.
    12. -
    13. If O implements an interface with a named property setter, then set - desc.[[Writable]] to true, otherwise set it to - false.
    14. -
    15. If the named property is defined to be unenumerable, - then set desc.[[Enumerable]] to false, - otherwise set it to true.
    16. -
    17. Set desc.[[Configurable]] to true.
    18. -
    19. Return desc.
    20. -
    -
  4. - -
  5. Return the result of calling the default [[GetOwnProperty]] internal method ( - [ECMA-262] - , section 9.1.5) on O passing P as the argument.
  6. -
-
- -
-

4.7.3 Platform object [[GetOwnProperty]] method

- -

- The internal [[GetOwnProperty]] method of every - platform object O that implements an interface - which supports indexed or - named properties - MUST behave as follows when called with property name P: -

- -
    -
  1. - Return the result of invoking the PlatformObjectGetOwnProperty - abstract operation with - O, P, and false as - arguments. -
  2. -
-
- -
-

4.7.4 Invoking a platform object indexed property setter

-

- To invoke an indexed property - setter with property name P and ECMAScript value - V, the following steps MUST be performed: -

-
    -
  1. Let index be the result of calling ToUint32(P).
  2. -
  3. Let creating be true if index is not a supported property index, and false otherwise.
  4. -
  5. Let operation be the operation used to declare the indexed property setter.
  6. -
  7. Let T be the type of the second argument of operation.
  8. -
  9. Let value be the result of converting V to an IDL value of type T.
  10. -
  11. If operation was defined without an identifier, then: -
      -
    1. If creating is true, then perform the steps listed in the interface description to - set the value of a new indexed property - with index as the index and value as the value.
    2. -
    3. Otherwise, creating is false. Perform the steps listed in the interface description to - set the value of an existing indexed property - with index as the index and value as the value.
    4. -
    -
  12. -
  13. Otherwise, operation was defined with an identifier. Perform the steps listed in the description of - operation with index and value as the two argument values.
  14. -
-
- -
-

4.7.5 Invoking a platform object named property setter

-

- To invoke a named property - setter with property name P and ECMAScript value - V, the following steps MUST be performed: -

-
    -
  1. Let creating be true if P is not a supported property name, and false otherwise.
  2. -
  3. Let operation be the operation used to declare the named property setter.
  4. -
  5. Let T be the type of the second argument of operation.
  6. -
  7. Let value be the result of converting V to an IDL value of type T.
  8. -
  9. If operation was defined without an identifier, then: -
      -
    1. If creating is true, then perform the steps listed in the interface description to - set the value of a new named property - with P as the name and value as the value.
    2. -
    3. Otherwise, creating is false. Perform the steps listed in the interface description to - set the value of an existing named property - with P as the name and value as the value.
    4. -
    -
  10. -
  11. Otherwise, operation was defined with an identifier. Perform the steps listed in the description of - operation with index and value as the two argument values.
  12. -
-
- -
-

4.7.6 Platform object [[Set]] method

- -

- The internal [[Set]] method of every - platform object O that implements an interface - which supports indexed or - named properties - MUST behave as follows when called - with property name P, value V, and - ECMAScript language value Receiver: -

- -
    -
  1. If O and Receiver are the same object, - then: -
      -
    1. - If O supports indexed - properties, P is an array index property - name, and O implements an interface with an indexed - property setter, then: -
        -
      1. - Invoke the indexed - property setter with P and V. -
      2. -
      3. Return true.
      4. -
      -
    2. - -
    3. - If O supports named - properties, Type(P) is String, - P is not an array index property - name, and O implements an interface with a named - property setter, then: -
        -
      1. - Invoke the named - property setter with P and V. -
      2. -
      3. Return true.
      4. -
      -
    4. -
    -
  2. -
  3. - Let ownDesc be the result of invoking the PlatformObjectGetOwnProperty - abstract operation with - O, P, and true as - arguments. -
  4. -
  5. - Perform steps 3-11 of the default [[Set]] internal method ( - [ECMA-262] - , section 9.1.9). -
  6. -
-
- -
-

4.7.7 Platform object [[DefineOwnProperty]] method

- -

- The internal [[DefineOwnProperty]] method of every - platform object O that implements an interface - which supports indexed or - named properties - MUST behave as follows when called with property name P, - Property Descriptor Desc and boolean flag Throw. - The term “Reject” is used as defined in - section . -

- -
    -
  1. - If O supports indexed properties and - P is an array index property name, then: -
      -
    1. If the result of calling IsDataDescriptor(Desc) is false, then Reject.
    2. -
    3. If O does not implement an interface with an indexed property setter, then Reject.
    4. -
    5. Invoke the indexed - property setter with P and Desc.[[Value]].
    6. -
    7. Return true.
    8. -
    -
  2. - -
  3. - If O supports named properties, - O does not implement an interface with the - [Global] or [PrimaryGlobal] extended attribute - and P is not an unforgeable property name - of O, then: -
      -
    1. Let creating be true if P is not a supported property name, and false otherwise.
    2. - -
    3. If O implements an interface with the [OverrideBuiltins] - extended attribute or O does not have an own property - named P, then: -
        -
      1. If creating is false and O does not implement an interface with a named property setter, then Reject.
      2. -
      3. If O implements an interface with a named property setter, then: -
          -
        1. If the result of calling IsDataDescriptor(Desc) is false, then Reject.
        2. -
        3. - Invoke the named - property setter with P and - Desc.[[Value]]. -
        4. -
        5. Return true.
        6. -
        -
      4. -
      -
    4. -
    -
  4. - -
  5. If O does not implement an interface with the - [Global] or [PrimaryGlobal] extended attribute, - then set Desc.[[Configurable]] to true.
  6. -
  7. Call the default [[DefineOwnProperty]] internal method ( - [ECMA-262] - , section 9.1.6) on O passing P, Desc, and Throw as arguments.
  8. -
-
- -
-

4.7.8 Platform object [[Delete]] method

- -

- The internal [[Delete]] method of every - platform object O that implements an interface - which supports indexed or - named properties - MUST behave as follows when called with property name P. -

- -
    -
  1. - If O supports indexed properties and - P is an array index property name, then: -
      -
    1. Let index be the result of calling ToUint32(P).
    2. -
    3. If index is not a supported property index, then return true.
    4. -
    5. Return false.
    6. -
    -
  2. - -
  3. - If O supports named properties, - O does not implement an interface with the - [Global] or [PrimaryGlobal] extended attribute - and the result of calling the named property visibility algorithm - with property name P and object O is true, then: -
      -
    1. If O does not implement an interface with a named property deleter, then false.
    2. -
    3. Let operation be the operation used to declare the named property deleter.
    4. -
    5. If operation was defined without an identifier, then: -
        -
      1. Perform the steps listed in the interface description to - delete an existing named property - with P as the name.
      2. -
      3. If the steps indicated that the deletion failed, then false.
      4. -
      -
    6. -
    7. Otherwise, operation was defined with an identifier: -
        -
      1. Perform the steps listed in the description of operation with P as the only argument value.
      2. -
      3. If operation was declared with a return type of boolean - and the steps returned false, then false.
      4. -
      -
    8. -
    9. Return true.
    10. -
    -
  4. - -
  5. If O has an own property with name P, then: -
      -
    1. If the property is not configurable, then false.
    2. -
    3. Otherwise, remove the property from O.
    4. -
    -
  6. -
  7. Return true.
  8. -
-
- -
-

4.7.9 Platform object [[Call]] method

- -

- The internal [[Call]] method of every - platform object O that implements an interface - I with at least one legacy caller - MUST behave as follows, assuming - arg0..n−1 is the list of argument - values passed to [[Call]]: -

-
    -
  1. Initialize S to the effective overload set - for legacy callers on I and with argument count n.
  2. -
  3. - Let <operation, values> be the result of passing S and - arg0..n−1 to the - overload resolution algorithm. -
  4. -
  5. Perform the actions listed in the description of the legacy caller operation with - values as the argument values.
  6. -
  7. Return the result of converting - the return value from those actions to an ECMAScript value of the type - operation is declared to return (or undefined - if operation is declared to return void).
  8. -
-
- -
-

4.7.10 Property enumeration

- -

- This document does not define a complete property enumeration order - for all platform objects implementing interfaces - (or for platform objects representing exceptions). - However, if a platform object implements an interface that - supports indexed or - named properties, then - properties on the object MUST be - enumerated in the following order: -

-
    -
  1. If the object supports indexed properties, then - the object’s supported property indices are - enumerated first, in numerical order.
  2. -
  3. If the object supports named properties, then - the object’s supported property names that - are visible according to the named property visibility algorithm - and which are not stated to be unenumerable - are enumerated next, in the order given in the definition of the set of supported property names.
  4. -
  5. Finally, any enumerable own properties or properties from the object’s prototype chain are then enumerated, - in no defined order.
  6. -
-
Note
-

Future versions of the ECMAScript specification may define a total order for property enumeration.

-
-
-
- -
-

4.8 User objects implementing callback interfaces

- -

- As described in section 3.9 , - callback interfaces can be - implemented in script by an ECMAScript object. - The following cases determine whether and how a given object - is considered to be a user object implementing a callback interface: -

-
    -
  • - If the interface is a single operation callback interface - object is considered to implement the interface. - The implementation of the operation (or set of overloaded operations) is - as follows: -
      -
    • If the object is callable, - then the implementation of the operation (or set of overloaded operations) is - the callable object itself.
    • -
    • Otherwise, the object is not callable. - The implementation of the operation (or set of overloaded operations) is - the result of invoking the internal [[Get]] method - on the object with a property name that is the identifier - of the operation.
    • -
    -
  • -
  • - Otherwise, the interface is not a single operation callback interface object is considered to implement the interface. - For each operation declared on the interface with a given identifier, the implementation - is the result of invoking [[Get]] on the object with a - property name that is that identifier. -
  • -
-

- A single operation callback interface is - a callback interface that: -

- -

- A user object’s - operation is called - with a list of IDL argument values idlarg0..n−1 by - following the algorithm below. The callback this value - is the value to use as the this value - when a callable - object was supplied as the implementation of a - single operation callback interface. - By default, undefined is used as the callback this value, - however this MAY be overridden by other - specifications. -

-
    -
  1. Try running the following steps: -
      -
    1. Let V be the IDL callback interface type value - that represents the user object implementing the interface.
    2. -
    3. Let O be the ECMAScript object corresponding to V.
    4. -
    5. Let X be the implementation of the operation. If the interface is a single operation callback interface and IsCallable(O) is true, then X is O. - Otherwise, X is the result of calling - the internal [[Get]] method of O with the identifier of the operation as the property name.
    6. -
    7. If Type(X) is not Object, throw a TypeError exception.
    8. -
    9. If IsCallable(X) is false, then throw a TypeError exception.
    10. -
    11. Let this be O if the interface is not a single operation callback interface - or if IsCallable(O) is false, - and the callback this value otherwise.
    12. -
    13. - Let arg0..n−1 be a list of - ECMAScript values, where argi is the result - of converting - idlargi to an ECMAScript value. -
    14. -
    15. Let script be the callback context associated with V.
    16. -
    17. Push script on to the stack of incumbent scripts. [HTML]
    18. -
    19. Let R be an uninitialized variable.
    20. -
    21. Try running the following step: -
        -
      1. Set R to the result of invoking the [[Call]] method of X, providing this as the this value and arg0..n−1 as the argument values.
      2. -
      - And then, whether or not an exception was thrown: -
        -
      1. Pop script off the stack of incumbent scripts.
      2. -
      3. If an exception was thrown, end these steps, and allow it to propagate.
      4. -
      -
    22. -
    23. If the operation’s return type is void, return.
    24. -
    25. - Return the result of converting - R to an IDL value of the same type as the operation’s return type. -
    26. -
    - And then, if an exception was thrown: -
      -
    1. If the operation has a return type that is a promise type, then: -
        -
      1. Let reject be the initial value of %Promise%.reject.
      2. -
      3. Return the result of calling reject with %Promise% as the this object and the exception as the single argument value.
      4. -
      -
    2. -
    3. Otherwise, end these steps and allow the exception to propagate.
    4. -
    -
  2. -
-

- Note that ECMAScript objects need not have - properties corresponding to constants - on them to be considered as user objects - implementing interfaces that happen - to have constants declared on them. -

-

- The value of a user object’s - attribute is retrieved using the - following algorithm: -

-
    -
  1. Try running the following steps: -
      -
    1. Let V be the IDL callback interface type value - that represents the user object implementing the interface.
    2. -
    3. Let O be the ECMAScript object corresponding to V.
    4. -
    5. Let P be the identifier of the attribute.
    6. -
    7. Let script be the callback context associated with V.
    8. -
    9. Push script on to the stack of incumbent scripts. [HTML]
    10. -
    11. Let R be an uninitialized variable.
    12. -
    13. Try running the following step: -
        -
      1. Set R to the result of invoking the [[Get]] method of O with property name P.
      2. -
      - And then, whether or not an exception was thrown: -
        -
      1. Pop script off the stack of incumbent scripts.
      2. -
      3. If an exception was thrown, end these steps, and allow it to propagate.
      4. -
      -
    14. -
    15. Return the result of converting R to an IDL value of the same type as the attribute’s type.
    16. -
    - And then, if an exception was thrown: -
      -
    1. If the attribute has a return type that is a promise type, then: -
        -
      1. Let reject be the initial value of %Promise%.reject.
      2. -
      3. Return the result of calling reject with %Promise% as the this object and the exception as the single argument value.
      4. -
      -
    2. -
    3. Otherwise, end these steps and allow the exception to propagate.
    4. -
    -
  2. -
-

- The value of a user object’s - attribute is set using the - following algorithm: -

-
    -
  1. Let V be the IDL callback interface type value - that represents the user object implementing the interface.
  2. -
  3. Let O be the ECMAScript object corresponding to V.
  4. -
  5. Let P be the identifier of the attribute.
  6. -
  7. Let V be the IDL value to be assigned to the attribute.
  8. -
  9. Let W be the result of converting V to an ECMAScript value.
  10. -
  11. Let script be the callback context associated with V.
  12. -
  13. Push script on to the stack of incumbent scripts. [HTML]
  14. -
  15. Try running the following step: -
      -
    1. Invoke the [[Put]] method of O with property name P and value W.
    2. -
    - And then, whether or not an exception was thrown: -
      -
    1. Pop script off the stack of incumbent scripts.
    2. -
    3. If an exception was thrown, end these steps, and allow it to propagate.
    4. -
    -
  16. -
-
- -
-

4.9 Invoking callback functions

- -

- An ECMAScript callable object that is being - used as a callback function value is - called in a manner similar to how operations - on user objects are called (as - described in the previous section). The callable object - is called with a list of values - arg0..n−1, - each of which is either an IDL value of the special value “missing” (representing - a missing optional argument), by - following the algorithm below. By default, the callback this value - when invoking a callback function - is undefined, unless overridden by other specifications. -

-
    -
  1. Try running the following steps: -
      -
    1. Let V be the IDL callback function type value.
    2. -
    3. Let F be the ECMAScript object corresponding to V.
    4. -
    5. Let R be an uninitialized variable.
    6. -
    7. If IsCallable(F) is false, then set R to the value undefined.
    8. -
    9. Otherwise, -
        -
      1. Initialize values to be an empty list of ECMAScript values.
      2. -
      3. Initialize count to 0.
      4. -
      5. Initialize i to 0.
      6. -
      7. While i < n: -
          -
        1. If argi is the special value “missing”, then append to values the ECMAScript undefined value.
        2. -
        3. Otherwise, argi is an IDL value. Append to values the result of - converting argi to an ECMAScript value, - and set count to i + 1.
        4. -
        5. Set i to i + 1.
        6. -
        -
      8. -
      9. Truncate values to have length count.
      10. -
      11. Let script be the callback context associated with V.
      12. -
      13. Push script on to the stack of incumbent scripts. [HTML]
      14. -
      15. Try running the following step: -
          -
        1. Set R to the result of invoking the [[Call]] method of F, providing the callback this value as the this value and values as the argument values.
        2. -
        - And then, whether or not an exception was thrown: -
          -
        1. Pop script off the stack of incumbent scripts.
        2. -
        3. If an exception was thrown, end these steps, and allow it to propagate.
        4. -
        -
      16. -
      -
    10. -
    11. If the callback function’s return type is void, return.
    12. -
    13. - Return the result of converting - R to an IDL value of the same type as the callback function’s return type. -
    14. -
    - And then, if an exception was thrown: -
      -
    1. If the callback function has a return type that is a promise type, then: -
        -
      1. Let reject be the initial value of %Promise%.reject.
      2. -
      3. Return the result of calling reject with %Promise% as the this object and the exception as the single argument value.
      4. -
      -
    2. -
    3. Otherwise, end these steps and allow the exception to propagate.
    4. -
    -
  2. -
-
- -
-

4.10 Exceptions

- -

- There MUST exist a property on the ECMAScript global object - whose name is “DOMException” and value is an object called the - DOMException constructor object, - which provides access to legacy DOMException code constants and allows construction of - DOMException instances. - The property has the attributes { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true }. -

- -
-

4.10.1 DOMException constructor object

- -

- The DOMException constructor object MUST be a function object - but with a [[Prototype]] value of %Error% ( - [ECMA-262] - , section 6.1.7.4). -

-

- For every legacy code listed in the error names table, - there MUST be a property on the DOMException constructor object - whose name and value are as indicated in the table. The property has - attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }. -

-

- The DOMException constructor object MUST also have a property named - “prototype” with attributes - { [[Writable]]: false, [[Enumerable]]: false, [[Configurable]]: false } - whose value is an object called the DOMException prototype object. - This object also provides access to the legacy code values. -

- -
-
4.10.1.1 DOMException(message, name)
- -

When the DOMException function is called with arguments message and name, the following steps are taken:

- -
    -
  1. Let F be the active function object.
  2. -
  3. If NewTarget is undefined, let newTarget be F, else let newTarget be NewTarget.
  4. -
  5. Let super be F.[[GetPrototypeOf]]().
  6. -
  7. ReturnIfAbrupt(super).
  8. -
  9. If IsConstructor(super) is false, throw a TypeError exception.
  10. -
  11. Let O be Construct(super, «message», newTarget).
  12. -
  13. If name is not undefined, then -
      -
    1. Let name be ToString(name).
    2. -
    3. Let status be DefinePropertyOrThrow(O, "name", PropertyDescriptor{[[Value]]: name, [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true}).
    4. -
    5. ReturnIfAbrupt(status).
    6. -
    7. Let code be the legacy code indicated in the error names table for error name name, or 0 if there is none.
    8. -
    9. Let status be DefinePropertyOrThrow(O, "code", PropertyDescriptor{[[Value]]: code, [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true}).
    10. -
    11. ReturnIfAbrupt(status).
    12. -
    -
  14. -
  15. Return O.
  16. -
-
-
- -
-

4.10.2 DOMException prototype object

- -

- The DOMException prototype object MUST - have an internal [[Prototype]] property whose value is %ErrorPrototype% ( - [ECMA-262] - , section 6.1.7.4). -

-

- The class string of the - DOMException prototype object - is “DOMExceptionPrototype”. -

-

- There MUST be a property named “constructor” - on the DOMException prototype object with attributes - { [[Writable]]: true, [[Enumerable]]: false, [[Configurable]]: true } - and whose value is the DOMException constructor object. -

-

- For every legacy code listed in the error names table, - there MUST be a property on the DOMException prototype object - whose name and value are as indicated in the table. The property has - attributes { [[Writable]]: false, [[Enumerable]]: true, [[Configurable]]: false }. -

-
-
- -
-

4.11 Exception objects

- -

- Simple exceptions are represented - by native ECMAScript objects of the corresponding type. -

-

- DOMExceptions are represented by - platform objects that inherit from - the DOMException prototype object. -

-

- Every platform object representing a DOMException in ECMAScript is associated with a global environment, just - as the initial objects are. - When an exception object is created by calling the DOMException constructor object, - either normally or as part of a new expression, then the global environment - of the newly created object is associated with MUST be the same as for the - DOMException constructor object itself. -

-

- The value of the internal [[Prototype]] - property of a DOMException - object MUST be the DOMException prototype object - from the global environment the exception object is associated with. -

-

- The class string - of a DOMException object - MUST be “DOMException”. -

-
Note
-

The intention is for DOMException objects to be just like the other - various native Error objects that the - ECMAScript specification defines, apart from responding differently - to being passed to Object.prototype.toString and it having a “code” property. - If an implementation places non-standard properties on native - Error objects, exposing for example - stack traces or error line numbers, then these ought to be exposed - on exception objects too.

-
-
- -
-

4.12 Creating and throwing exceptions

- -

- First, we define the current global environment - as the result of running the following algorithm: -

-
    -
  1. - Let F be the Function object used - as the this value in the top-most call - on the ECMAScript call stack where F corresponds to an IDL - attribute, - operation, - indexed property, - named property, - constructor, - named constructor, - stringifier, - exception constructor or - exception field getter. -
  2. -
  3. - If F corresponds to an attribute, operation or stringifier, then return - the global environment associated with the - interface that definition appears on. -
  4. -
  5. - Otherwise, if F corresponds to an indexed or named property, then return - the global environment associated with the interface that - the indexed or named property getter, setter or deleter was defined on. -
  6. -
  7. - Otherwise, if F is a named constructor for an interface, or is - an interface object for an - interface that is a constructor, then return the global environment - associated with that interface. -
  8. -
  9. - Otherwise, if F is an exception field getter, then return - the global environment associated with the exception on which the - exception field was defined. -
  10. -
  11. - Otherwise, F is an exception interface object that is a constructor. - Return the global environment associated with that exception. -
  12. -
-

- When a simple exception or - DOMException - E is to be created, - with error name N and - optional user agent-defined message M, - the following steps MUST be followed: -

-
    -
  1. If M was not specified, let M be undefined. Otherwise, let it be the result of converting M to a String value.
  2. -
  3. Let N be the result of converting N to a String value.
  4. -
  5. Let args be a list of ECMAScript values. -
    -
    E is DOMException
    -
    args is (undefined, N).
    -
    E is a simple exception
    -
    args is (M)
    -
    -
  6. -
  7. Let G be the current global environment.
  8. -
  9. Let X be an object determined based on the type of E: -
    -
    E is DOMException
    -
    X is the DOMException constructor object - from the global environment G.
    -
    E is a simple exception
    -
    X is the constructor for the corresponding ECMAScript error - from the global environment G.
    -
    -
  10. -
  11. Let O be the result of calling X as a function - with args as the argument list.
  12. -
  13. Return O.
  14. -
-

- When a simple exception or - DOMException - E is to be thrown, - with error name N and - optional user agent-defined message M, - the following steps MUST be followed: -

-
    -
  1. Let O be the result of creating - the specified exception E with error name N and - optional user agent-defined message M.
  2. -
  3. Throw O.
  4. -
-
Note
-

- The above algorithms do not restrict platform objects representing exceptions - propagating out of a Function to be - ones that are associated with the global environment - where that Function object originated. - For example, consider the IDL: -

-
IDL
interface A {
-
-  /**
-   * Calls computeSquareRoot on m, passing x as its argument.
-   */
-  double doComputation(MathUtils m, double x);
-};
-
-interface MathUtils {
-  /**
-   * If x is negative, throws a NotSupportedError.  Otherwise, returns
-   * the square root of x.
-   */
-  double computeSquareRoot(double x);
-};
-

- If we pass a MathUtils object from - a different global environment to doComputation, then the exception - thrown will be from that global environment: -

-
ECMAScript
var a = getA();                           // An A object from this global environment.
-var m = otherWindow.getMathUtils();       // A MathUtils object from a different global environment.
-
-a instanceof Object;                      // Evaluates to true.
-m instanceof Object;                      // Evaluates to false.
-m instanceof otherWindow.Object;          // Evaluates to true.
-
-try {
-  a.doComputation(m, -1);
-} catch (e) {
-  e instanceof DOMException;              // Evaluates to false.
-  e instanceof otherWindow.DOMException;  // Evaluates to true.
-}
-
-

- Any requirements in this document to throw an instance of an ECMAScript built-in - Error MUST use - the built-in from the current global environment. -

-
- -
-

4.13 Handling exceptions

- -

- None of the algorithms or processing requirements in the - ECMAScript language binding catch ECMAScript exceptions. Whenever - an ECMAScript Function is invoked due - to requirements in this section and that Function - ends due to an exception being thrown, that exception - MUST propagate to the caller, and if - not caught there, to its caller, and so on. -

-
Example
-

- The following IDL fragment - defines two interfaces - and an exception. - The valueOf attribute on ExceptionThrower - is defined to throw an exception whenever an attempt is made - to get its value. -

-
IDL
interface Dahut {
-  attribute DOMString type;
-};
-
-interface ExceptionThrower {
-  // This attribute always throws a NotSupportedError and never returns a value.
-  attribute long valueOf;
-};
-

- Assuming an ECMAScript implementation supporting this interface, - the following code demonstrates how exceptions are handled: -

-
ECMAScript
var d = getDahut();              // Obtain an instance of Dahut.
-var et = getExceptionThrower();  // Obtain an instance of ExceptionThrower.
-
-try {
-  d.type = { toString: function() { throw "abc"; } };
-} catch (e) {
-  // The string "abc" is caught here, since as part of the conversion
-  // from the native object to a string, the anonymous function
-  // was invoked, and none of the [[DefaultValue]], ToPrimitive or
-  // ToString algorithms are defined to catch the exception.
-}
-
-try {
-  d.type = { toString: { } };
-} catch (e) {
-  // An exception is caught here, since an attempt is made to invoke
-  // [[Call]] on the native object that is the value of toString
-  // property.
-}
-
-d.type = et;
-// An uncaught NotSupportedError DOMException is thrown here, since the
-// [[DefaultValue]] algorithm attempts to get the value of the
-// "valueOf" property on the ExceptionThrower object.  The exception
-// propagates out of this block of code.
-
-
-
- -
-

5. Common definitions

- -

- This section specifies some common definitions that all - conforming implementations - MUST support. -

- -
-

5.1 ArrayBufferView

- -
IDL
typedef (Int8Array or Int16Array or Int32Array or
-         Uint8Array or Uint16Array or Uint32Array or Uint8ClampedArray or
-         Float32Array or Float64Array or DataView) ArrayBufferView;
-

- The ArrayBufferView typedef is used to represent - objects that provide a view on to an ArrayBuffer. -

-
- -
-

5.2 BufferSource

- -
IDL
typedef (ArrayBufferView or ArrayBuffer) BufferSource;
-

- The BufferSource typedef is used to represent objects - that are either themselves an ArrayBuffer or which - provide a view on to an ArrayBuffer. -

-
- -
-

5.3 DOMTimeStamp

- -
IDL
typedef unsigned long long DOMTimeStamp;
-

- The DOMTimeStamp type is used for representing - a number of milliseconds, either as an absolute time (relative to some epoch) - or as a relative amount of time. Specifications that use this type will need - to define how the number of milliseconds is to be interpreted. -

-
- -
-

5.4 Function

- -
IDL
callback Function = any (any... arguments);
-

- The Function callback function - type is used for representing function values with no restriction on what arguments - are passed to it or what kind of value is returned from it. -

-
- -
-

5.5 VoidFunction

- -
IDL
callback VoidFunction = void ();
-

- The VoidFunction callback function - type is used for representing function values that take no arguments and do not - return any value. -

-
-
- -
-

6. Extensibility

- -

This section is informative.

- -

- Extensions to language binding requirements can be specified - using extended attributes - that do not conflict with those defined in this document. Extensions for - private, project-specific use should not be included in - IDL fragments - appearing in other specifications. It is recommended that extensions - that are required for use in other specifications be coordinated - with the group responsible for work on Web IDL, which - at the time of writing is the - W3C Web Platform Working Group, - for possible inclusion in a future version of this document. -

-

- Extensions to any other aspect of the IDL language are - strongly discouraged. -

-
- -
-

7. Referencing this specification

- -

This section is informative.

- -

- It is expected that other specifications that define Web platform interfaces - using one or more IDL fragments - will reference this specification. It is suggested - that those specifications include a sentence such as the following, - to indicate that the IDL is to be interpreted as described in this - specification: -

-
-

- The IDL fragment in Appendix A of this specification must, in conjunction - with the IDL fragments defined in this specification's normative references, - be interpreted as required for conforming sets of IDL fragments, as described in the - “Web IDL” specification. [WEBIDL] -

-
-

- In addition, it is suggested that the conformance class for user - agents in referencing specifications be linked to the - conforming - implementation class from this specification: -

-
-

- A conforming FooML user agent must also be a - conforming implementation of the IDL fragment in Appendix A - of this specification, as described in the - “Web IDL” specification. [WEBIDL] -

-
-
- -
-

8. Acknowledgements

- -

This section is informative.

- -

- The editor would like to thank the following people for contributing - to this specification: - Glenn Adams, - David Andersson, - L. David Baron, - Art Barstow, - Nils Barth, - Robin Berjon, - David Bruant, - Jan-Ivar Bruaroey, - Marcos Cáceres, - Giovanni Campagna, - Domenic Denicola, - Michael Dyck, - Brendan Eich, - João Eiras, - Gorm Haug Eriksen, - Sigbjorn Finne, - David Flanagan, - Aryeh Gregor, - Dimitry Golubovsky, - James Graham, - Aryeh Gregor, - Kartikaya Gupta, - Marcin Hanclik, - Jed Hartman, - Stefan Haustein, - Dominique Hazaël-Massieux, - Ian Hickson, - Björn Höhrmann, - Kyle Huey, - Lachlan Hunt, - Oliver Hunt, - Jim Jewett, - Wolfgang Keller, - Anne van Kesteren, - Olav Junker Kjær, - Magnus Kristiansen, - Travis Leithead, - Jim Ley, - Kevin Lindsey, - Jens Lindström, - Peter Linss, - 呂康豪 (Kang-Hao Lu), - Kyle Machulis, - Mark Miller, - Ms2ger, - Andrew Oakley, - 岡坂 史紀 (Shiki Okasaka), - Jason Orendorff, - Olli Pettay, - Simon Pieters, - Andrei Popescu, - François Remy, - Tim Renouf, - Alex Russell, - Takashi Sakamoto, - Doug Schepers, - Jonas Sicking, - Garrett Smith, - Geoffrey Sneddon, - Jungkee Song, - Josh Soref, - Maciej Stachowiak, - Anton Tayanovskyy, - Peter Van der Beken, - Jeff Walden, - Allen Wirfs-Brock, - Jeffrey Yasskin and - Collin Xu. -

-

- Special thanks also go to Sam Weinig for maintaining this document - while the editor was unavailable to do so. -

-
- - -

A. IDL grammar

- This section defines an LL(1) grammar whose start symbol, - Definitions, matches an - entire IDL fragment. -

- Each production in the grammar has on its right hand side either a - non-zero sequence of terminal and non-terminal symbols, or an - epsilon (ε) which indicates no symbols. Symbols that begin with - an uppercase letter are non-terminal symbols. Symbols within quotes - are terminal symbols that are matched with the exact text between - the quotes. Symbols that begin with a lowercase letter are terminal - symbols that are matched by the regular expressions (using Perl 5 regular - expression syntax [PERLRE]) as follows: -

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
integer=/-?([1-9][0-9]*|0[Xx][0-9A-Fa-f]+|0[0-7]*)/
float=/-?(([0-9]+\.[0-9]*|[0-9]*\.[0-9]+)([Ee][+-]?[0-9]+)?|[0-9]+[Ee][+-]?[0-9]+)/
identifier=/_?[A-Za-z][0-9A-Z_a-z-]*/
string=/"[^"]*"/
whitespace=/[\t\n\r ]+/
comment=/\/\/.*|\/\*(.|\n)*?\*\//
other=/[^\t\n\r 0-9A-Za-z]/

- The tokenizer operates on a sequence of Unicode characters - [UNICODE]. - When tokenizing, the longest possible match MUST be used. For example, if the input - text is “a1”, it is tokenized as a single identifier, - and not as a separate identifier and integer. - If the longest possible match could match one of the above named terminal symbols or - one of the quoted terminal symbols from the grammar, it MUST be tokenized as the quoted - terminal symbol. Thus, the input text “long” is tokenized as the quoted terminal symbol - "long" rather than an identifier called “long”, - and “.” is tokenized as the quoted terminal symbol - "." rather than an other. -

- The IDL syntax is case sensitive, both for the quoted terminal symbols - used in the grammar and the values used for - identifier terminals. Thus, for - example, the input text “Const” is tokenized as - an identifier rather than the quoted - terminal symbol "const", an - interface with - identifier - “A” is distinct from one named “a”, and an - extended attribute - [constructor] will not be recognized as - the [Constructor] - extended attribute. -

- Implicitly, any number of whitespace and - comment terminals are allowed between every other terminal - in the input text being parsed. Such whitespace and - comment terminals are ignored while parsing. -

- The following LL(1) grammar, starting with Definitions, - matches an IDL fragment: -

[1]DefinitionsExtendedAttributeList Definition Definitions
 | - ε
[2]DefinitionCallbackOrInterface
 | - Partial
 | - Dictionary
 | - Enum
 | - Typedef
 | - ImplementsStatement
[3]CallbackOrInterface"callback" CallbackRestOrInterface
 | - Interface
[4]CallbackRestOrInterfaceCallbackRest
 | - Interface
[5]Interface"interface" identifier Inheritance "{" InterfaceMembers "}" ";"
[6]Partial"partial" PartialDefinition
[7]PartialDefinitionPartialInterface
 | - PartialDictionary
[8]PartialInterface"interface" identifier "{" InterfaceMembers "}" ";"
[9]InterfaceMembersExtendedAttributeList InterfaceMember InterfaceMembers
 | - ε
[10]InterfaceMemberConst
 | - Operation
 | - Serializer
 | - Stringifier
 | - StaticMember
 | - Iterable
 | - ReadOnlyMember
 | - ReadWriteAttribute
[11]Dictionary"dictionary" identifier Inheritance "{" DictionaryMembers "}" ";"
[12]DictionaryMembersExtendedAttributeList DictionaryMember DictionaryMembers
 | - ε
[13]DictionaryMemberRequired Type identifier Default ";"
[14]Required"required"
 | - ε
[15]PartialDictionary"dictionary" identifier "{" DictionaryMembers "}" ";"
[16]Default"=" DefaultValue
 | - ε
[17]DefaultValueConstValue
 | - string
 | - "[" "]"
[18]Inheritance":" identifier
 | - ε
[19]Enum"enum" identifier "{" EnumValueList "}" ";"
[20]EnumValueListstring EnumValueListComma
[21]EnumValueListComma"," EnumValueListString
 | - ε
[22]EnumValueListStringstring EnumValueListComma
 | - ε
[23]CallbackRestidentifier "=" ReturnType "(" ArgumentList ")" ";"
[24]Typedef"typedef" Type identifier ";"
[25]ImplementsStatementidentifier "implements" identifier ";"
[26]Const"const" ConstType identifier "=" ConstValue ";"
[27]ConstValueBooleanLiteral
 | - FloatLiteral
 | - integer
 | - "null"
[28]BooleanLiteral"true"
 | - "false"
[29]FloatLiteralfloat
 | - "-Infinity"
 | - "Infinity"
 | - "NaN"
[30]Serializer"serializer" SerializerRest
[31]SerializerRestOperationRest
 | - "=" SerializationPattern ";"
 | - ";"
[32]SerializationPattern"{" SerializationPatternMap "}"
 | - "[" SerializationPatternList "]"
 | - identifier
[33]SerializationPatternMap"getter"
 | - "inherit" Identifiers
 | - identifier Identifiers
 | - ε
[34]SerializationPatternList"getter"
 | - identifier Identifiers
 | - ε
[35]Stringifier"stringifier" StringifierRest
[36]StringifierRestReadOnly AttributeRest
 | - ReturnType OperationRest
 | - ";"
[37]StaticMember"static" StaticMemberRest
[38]StaticMemberRestReadOnly AttributeRest
 | - ReturnType OperationRest
[39]ReadOnlyMember"readonly" ReadOnlyMemberRest
[40]ReadOnlyMemberRestAttributeRest
[41]ReadWriteAttribute"inherit" ReadOnly AttributeRest
 | - AttributeRest
[42]AttributeRest"attribute" Type AttributeName ";"
[43]AttributeNameAttributeNameKeyword
 | - identifier
[44]AttributeNameKeyword"required"
[45]Inherit"inherit"
 | - ε
[46]ReadOnly"readonly"
 | - ε
[47]OperationReturnType OperationRest
 | - SpecialOperation
[48]SpecialOperationSpecial Specials ReturnType OperationRest
[49]SpecialsSpecial Specials
 | - ε
[50]Special"getter"
 | - "setter"
 | - "deleter"
 | - "legacycaller"
[51]OperationRestOptionalIdentifier "(" ArgumentList ")" ";"
[52]OptionalIdentifieridentifier
 | - ε
[53]ArgumentListArgument Arguments
 | - ε
[54]Arguments"," Argument Arguments
 | - ε
[55]ArgumentExtendedAttributeList OptionalOrRequiredArgument
[56]OptionalOrRequiredArgument"optional" Type ArgumentName Default
 | - Type Ellipsis ArgumentName
[57]ArgumentNameArgumentNameKeyword
 | - identifier
[58]Ellipsis"..."
 | - ε
[59]Iterable"iterable" "<" Type OptionalType ">" ";"
[60]OptionalType"," Type
 | - ε
[65]ExtendedAttributeList"[" ExtendedAttribute ExtendedAttributes "]"
 | - ε
[66]ExtendedAttributes"," ExtendedAttribute ExtendedAttributes
 | - ε
[67]ExtendedAttribute - "(" ExtendedAttributeInner ")" ExtendedAttributeRest -
 | - "[" ExtendedAttributeInner "]" ExtendedAttributeRest -
 | - "{" ExtendedAttributeInner "}" ExtendedAttributeRest -
 | - Other ExtendedAttributeRest -
[68]ExtendedAttributeRestExtendedAttribute
 | - ε
[69]ExtendedAttributeInner - "(" ExtendedAttributeInner ")" ExtendedAttributeInner -
 | - "[" ExtendedAttributeInner "]" ExtendedAttributeInner -
 | - "{" ExtendedAttributeInner "}" ExtendedAttributeInner -
 | - OtherOrComma ExtendedAttributeInner -
 | - ε -
[70]Other - integer
 | - float
 | - identifier
 | - string
 | - other -
 | - "-"
 | - "-Infinity"
 | - "."
 | - "..."
 | - ":"
 | - ";"
 | - "<"
 | - "="
 | - ">"
 | - "?" -
 | - "ByteString"
 | - "DOMString"
 | - "Infinity"
 | - "NaN"
 | - "USVString"
 | - "any"
 | - "boolean"
 | - "byte"
 | - "double"
 | - "false"
 | - "float" -
 | - "long"
 | - "null"
 | - "object"
 | - "octet"
 | - "or"
 | - "optional"
 | - "sequence" -
 | - "short"
 | - "true"
 | - "unsigned"
 | - "void" -
 | - ArgumentNameKeyword -
 | - BufferRelatedType -
[71]ArgumentNameKeyword - "attribute"
 | - "callback"
 | - "const"
 | - "deleter"
 | - "dictionary" -
 | - "enum"
 | - "getter"
 | - "implements"
 | - "inherit"
 | - "interface"
 | - "iterable" -
 | - "legacycaller"
 | - "partial"
 | - "required"
 | - "serializer"
 | - "setter"
 | - "static"
 | - "stringifier"
 | - "typedef" -
 | - "unrestricted" -
[72]OtherOrCommaOther
 | - ","
[73]TypeSingleType
 | - UnionType Null
[74]SingleTypeNonAnyType
 | - "any"
[75]UnionType"(" UnionMemberType "or" UnionMemberType UnionMemberTypes ")"
[76]UnionMemberTypeNonAnyType
 | - UnionType Null
[77]UnionMemberTypes"or" UnionMemberType UnionMemberTypes
 | - ε
[78]NonAnyTypePrimitiveType Null
 | - PromiseType Null
 | - "ByteString" Null
 | - "DOMString" Null
 | - "USVString" Null
 | - identifier Null
 | - "sequence" "<" Type ">" Null
 | - "object" Null
 | - "Error" Null
 | - "DOMException" Null
 | - BufferRelatedType Null
[79]BufferRelatedType"ArrayBuffer"
 | - "DataView"
 | - "Int8Array"
 | - "Int16Array"
 | - "Int32Array"
 | - "Uint8Array"
 | - "Uint16Array"
 | - "Uint32Array"
 | - "Uint8ClampedArray"
 | - "Float32Array"
 | - "Float64Array"
[80]ConstTypePrimitiveType Null
 | - identifier Null
[81]PrimitiveTypeUnsignedIntegerType
 | - UnrestrictedFloatType
 | - "boolean"
 | - "byte"
 | - "octet"
[82]UnrestrictedFloatType"unrestricted" FloatType
 | - FloatType
[83]FloatType"float"
 | - "double"
[84]UnsignedIntegerType"unsigned" IntegerType
 | - IntegerType
[85]IntegerType"short"
 | - "long" OptionalLong
[86]OptionalLong"long"
 | - ε
[87]PromiseType"Promise" "<" ReturnType ">"
[88]Null"?"
 | - ε
[89]ReturnTypeType
 | - "void"
[90]IdentifierListidentifier Identifiers
[91]Identifiers"," identifier Identifiers
 | - ε
[92]ExtendedAttributeNoArgsidentifier
[93]ExtendedAttributeArgListidentifier "(" ArgumentList ")"
[94]ExtendedAttributeIdentidentifier "=" identifier
[95]ExtendedAttributeIdentListidentifier "=" "(" IdentifierList ")"
[96]ExtendedAttributeNamedArgListidentifier "=" identifier "(" ArgumentList ")"
Note
-

- The Other - non-terminal matches any single terminal symbol except for - "(", ")", - "[", "]", - "{", "}" - and ",". -

-

- While the ExtendedAttribute - non-terminal matches any non-empty sequence of terminal symbols (as long as any - parentheses, square brackets or braces are balanced, and the - "," token appears only within those balanced brackets), - only a subset of those - possible sequences are used by the extended attributes - defined in this specification — see - section 3.11 - for the syntaxes that are used by these extended attributes. -

- +
+ [CONTENT] +
-

B. References

B.1 Normative references

[ECMA-262]
Ecma International. ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ -
[IEEE-754]
ANSI/IEEE. IEEE Standard for Floating-Point Arithmetic. 03 September 2008. URL: http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4610935 -
[PERLRE]
The Perl Foundation. Perl Regular Expressions (Perl 5.8.8). January 2006. URL: http://search.cpan.org/dist/perl/pod/perlre.pod -
[RFC2119]
S. Bradner. IETF. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 -
[RFC2781]
P. Hoffman; F. Yergeau. IETF. UTF-16, an encoding of ISO 10646. February 2000. Informational. URL: https://tools.ietf.org/html/rfc2781 -
[RFC3629]
F. Yergeau. IETF. UTF-8, a transformation format of ISO 10646. November 2003. Internet Standard. URL: https://tools.ietf.org/html/rfc3629 -
[TYPEDARRAYS]
V. Vukicevic; K. Russell. The Khronos Group. Typed Array Specification. 8 February 2011. URL: https://www.khronos.org/registry/typedarray/specs/1.0/ -
[UNICODE]
The Unicode Consortium. The Unicode Standard. URL: http://www.unicode.org/versions/latest/ -

B.2 Informative references

[DOM]
Anne van Kesteren; Aryeh Gregor; Ms2ger; Alex Russell; Robin Berjon. W3C. W3C DOM4. 19 November 2015. W3C Recommendation. URL: http://www.w3.org/TR/dom/ -
[DOM3CORE]
Arnaud Le Hors; Philippe Le Hégaret; Lauren Wood; Gavin Nicol; Jonathan Robie; Mike Champion; Steven B Byrne et al. W3C. Document Object Model (DOM) Level 3 Core Specification. 7 April 2004. W3C Recommendation. URL: http://www.w3.org/TR/DOM-Level-3-Core/ -
[HTML]
Ian Hickson. WHATWG. HTML Standard. Living Standard. URL: https://html.spec.whatwg.org/multipage/ -
[HTML5]
Ian Hickson; Robin Berjon; Steve Faulkner; Travis Leithead; Erika Doyle Navara; Edward O'Connor; Silvia Pfeiffer. W3C. HTML5. 28 October 2014. W3C Recommendation. URL: http://www.w3.org/TR/html5/ -
[OMGIDL]
Object Management Group. CORBA 3.1 – OMG IDL Syntax and Semantics chapter. January 2008. URL: http://www.omg.org/cgi-bin/doc?formal/08-01-04.pdf -
[WEBIDL]
WebIDL. URL: https://heycam.github.io/webidl/ -
[XMLNS]
Tim Bray; Dave Hollander; Andrew Layman; Richard Tobin; Henry Thompson et al. W3C. Namespaces in XML 1.0 (Third Edition). 8 December 2009. W3C Recommendation. URL: http://www.w3.org/TR/xml-names -
+ diff --git a/test/docs/metadata/csvw-ucr.html b/test/docs/metadata/csvw-ucr.html index 7271b621f..739fd30ef 100644 --- a/test/docs/metadata/csvw-ucr.html +++ b/test/docs/metadata/csvw-ucr.html @@ -531,3909 +531,9 @@

Table of Contents

- -
-

1. Introduction

-

A large percentage of the data published on the Web is tabular data, commonly published as - comma separated values (CSV) files. CSV files may be of a significant size but they can be - generated and manipulated easily, and there is a significant body of software available to - handle them. Indeed, popular spreadsheet applications (Microsoft Excel, iWork’s Number, or - OpenOffice.org) as well as numerous other applications can produce and consume these files. - However, although these tools make conversion to CSV easy, it is resisted by some publishers - because CSV is a much less rich format that can't express important detail that the - publishers want to express, such as annotations, the meaning of identifier codes etc.

-

Existing formats for tabular data are format-oriented and hard to process (e.g. Excel); - un-extensible (e.g. CSV/TSV); or they assume the use of particular technologies (e.g. SQL - dumps). None of these formats allow developers to pull in multiple data sets, manipulate, - visualize and combine them in flexible ways. Other information relevant to these datasets, - such as access rights and provenance, is not easy to find. CSV is a very useful and simple - format, but to unlock the data and make it portable to environments other than the one in - which it was created, there needs to be a means of encoding and associating relevant - metadata.

-

To address these issues, the CSV on the Web Working Group seeks to provide:

-
    -
  • Metadata vocabulary for CSV data
  • -
  • Access methods for CSV Metadata
  • -
  • Mapping mechanism to transforming CSV into various formats (e.g., RDF [rdf11-concepts], JSON [RFC7159], or XML [xml])
  • -
-

In order to determine the scope of and elicit the requirements for this extended - CSV format (CSV+) a set of use cases have been compiled. Each use case provides a narrative - describing how a representative user works with tabular data to achieve their goal, - supported, where possible, with example datasets. The use cases observe existing common - practice undertaken when working with tabular data, often illustrating shortcomings or - limitations of existing formats or technologies. It is anticipated that the additional - metadata provided within the CSV+ format, when coupled with metadata-aware tools, will - simplify how users work with tabular data. As a result, the use cases seek to identify where - user effort may be reduced.

-

A set of requirements, used to guide the development of the CSV+ specification, have been - derived from the compiled use cases.

-
-
-

2. Use Cases

-

The use cases below describe many applications of tabular data. Whilst there are many - different variations of tabular data, all the examples conform to the definition of - tabular data defined in the Model for Tabular Data and Metadata on the Web [tabular-data-model]:

-

Tabular data is data that is structured into rows, each of which - contains information about some thing. Each row contains the same number of fields - (although some of these fields may be empty), which provide values of properties of - the thing described by the row. In tabular data, fields within the same column provide - values for the same property of the thing described by the particular row.

-

In selecting the use cases we have reviewed a number of row oriented data - formats that, at first glance, appear to be tabular data. However, closer inspection - indicates that one or other of the characteristics of tabular data were not present. - For example, the HL7 format, - from the health informatics domain defines a separate schema for each row (known as - a "segment" in that format) which means that HL7 messages do not have a regular - number of columns for each row.

-
-

2.1 Use Case #1 - Digital preservation of government records

-

- (Contributed by Adam Retter; supplemental information about use of XML provided by Liam Quin) -

-

The laws of England and Wales place obligations upon departments and The National Archives for the collection, - disposal and preservation of records. Government departments are obliged within the Public Records Act 1958 sections 3, 4 and 5 to select, transfer, preserve and make - available those records that have been defined as public records. These obligations apply - to records in all formats and media, including paper and digital records. Details - concerning the selection and transfer of records can be found here.

-

Departments transferring records to TNA must catalogue or list the selected records - according to The National Archives' defined cataloguing principles and standards. - Cataloguing is the process of writing a description, or Transcriptions of Records - for the records being transferred. Once each Transcription of Records is added to the - Records Catalogue, records can be subsequently discovered and accessed using the supplied - descriptions and titles.

-

TNA specifies what information should be provided within a Transcriptions of Records and - how that information should be formatted. A number of formats and syntaxes are supported, - including RDF. However, the predominant format used for the exchange of Transcriptions of - Records is CSV as the government departments providing the Records lack either the - technology or resources to provide metadata in the XML and RDF formats preferred by the - TNA.

-

A CSV-encoded Transcriptions of Records typically describes a set of Records, often - organised within a hierarchy. As a result, it is necessary to describe the - interrelationships between Records within a single CSV file.

-

Each row within a CSV file relates to a particular Record and is allocated a unique - identifier. This unique identifier behaves as a primary key for the Record within the - scope of the CSV file and is used when referencing that Record from within other Record - transcriptions. The unique identifier is unique within the scope of the datafile; in - order for the Record to be referenced from outside this datafile, the local identifier - must be mapped to a globally unique identifier such as a URI.

- -

- Requires: - PrimaryKey, - URIMapping and - ForeignKeyReferences. -

- -

Upon receipt by TNA, each of the Transcriptions of Records is validated against the (set - of) centrally published data definition(s); it is essential that received CSV metadata - comply with these specifications to ensure efficient and error free ingest into the - Records Catalogue.

-

The validation applied is dependent the type of entity described in each row. Entity type - is specified in a specific column (e.g. type).

-

The data definition file, or CSV Schema, used by the CSV Validation Tool effectively - forms the basis of a formal contract between TNA and supplying organisations. For more - information on the CSV Validation Tool and CSV Schema developed by TNA please refer to the - online - documentation.

-

The CSV Validation Tool is written in Scala - version 2.10.

- -

- Requires: - WellFormedCsvCheck and - CsvValidation. -

- -

Following validation, the CSV-encoded Transcriptions of Records are transformed into RDF - for insertion into the triple store that underpins the Records Catalogue. The CSV is initially - - transformed into an interim XML format using XSLT and then processed further using a mix - of XSLT, Java and Scala to create RDF/XML. The CSV files do - not include all the information required to undertake the transformation, e.g. defining - which RDF properties are to be used when creating triples for the data value in each cell. - As a result, bespoke software has been created by TNA to supply the necessary additional - information during the CSV to RDF transformation process. The availability of generic - mechanisms to transform CSV to RDF would reduce the burden of effort within TNA when - working with CSV files.

- -

- Requires: - SyntacticTypeDefinition, SemanticTypeDefinition and CsvToRdfTransformation. -

- -

- In this particular case, RDF is the target format for the conversiono f the CSV-encoded - Transcriptions of Records. However, the conversion of CSV to XML (in this case used as - an interim conversion step) is illustrative of a common data conversion workflow.

-

- The transformation outlined above is typical of common practice in that it uses a - freely-available XSLT transformation or XQuery parser (in this case - Andrew Wlech's CSV to - XML converter in XSLT 2.0) which is then modified to meet the specific usage requirements.

-

The resulting XML document can then be used include further transformed using XSLTto create - XHTML documention - perhaps including charts such histograms to present summary data.

- -

- Requires: - CsvToXmlTransformation. -

- -
-
-

2.2 Use Case #2 - Publication of National Statistics

-

- (Contributed by Jeni Tennison) -

-

The Office for National Statistics (ONS) is the UK’s - largest independent producer of official statistics and is the recognised national - statistical institute for the UK. It is responsible for collecting and publishing - statistics related to the economy, population and society at national, regional and local - levels.

-

Sets of statistics are typically grouped together into datasets comprising of collections of - related tabular data. Within their underlying information systems, ONS maintains a clear - separation between the statistical data itself and the metadata required for interpretation. - ONS classify the metadata into two categories:

-
    -
  • structural metadata: dimensionality, sort order, axis metadata, axis ordering etc.
  • -
  • reference metadata: linked descriptive information.
  • -
-

These datasets are published on-line in both CSV format and as Microsoft - Excel Workbooks that have been manually assembled from the underlying data.

-

For example, refer to dataset QS601EW Economic activity, derived from the - 2011 Census, is available as a precompiled Microsoft Excel Workbook for several sets of - administrative geographies, e.g. - 2011 Census: QS601EW Economic activity, local authorities in England and Wales, and in - CSV form via the ONS Data Explorer.

-

The ONS Data Explorer presents the user with a list of available datasets. A user may choose - to browse through the entire list or filter that list by topic. To enable the user to determine - whether or not a dataset meets their need, summary information is available for each dataset.

-

QS601EW Economic activity provides the following summary information:

-
    -
  • title: Economic activity
  • -
  • dimensions: Economic activity (T016A), 2011 Administrative Hierarchy, 2011 Westminster Parliamentary Constituency Hierarchy
  • -
  • dataset population: All usual residents aged 16 to 74
  • -
  • coverage: England and Wales
  • -
  • area types (list omitted here for brevity)
  • -
  • textual description of dataset
  • -
  • publication information
  • -
  • contact details
  • -
- -

- Requires: - AnnotationAndSupplementaryInfo. -

- -

Once the required dataset has been selected, the user is prompted to choose how they - would like the statistical data to be aggregated. In the case of QS601EW Economic - activity, the user is required to choose between the two mutually exclusive - geography types: 2011 Administrative Hierarchy and 2011 Westminster Parliamentary - Constituency Hierarchy. Effectively, the QS601EW Economic activity - dataset is partitioned into two separate tables for publication.

- -

- Requires: - GroupingOfMultipleTables. -

- -

The user is also provided with an option to sub-select only the elements of the - dataset that they deem pertinent for their needs. In the case of QS601EW Economic - activity the user may select data from upto 200 geographic areas within the - dataset to create a data subset that meets their needs. The data subset may be viewed - on-line (presented as an HTML table) or downloaded in CSV or Microsoft Excel formats.

- -

- Requires: - CsvAsSubsetOfLargerDataset. -

- -

An example extract of data for England and Wales in CSV form is provided below. - The data subset is provided as a compressed file containing both a CSV formatted data file - and a complementary html file containing the reference metadata. White space has been added - for clarity. File = - CSV_QS601EW2011WARDH_151277.zip

- -
Example 1
"QS601EW"
-"Economic activity"
-"19/10/13"
-
-               ,                 ,                                   "Count",                            "Count",                                   "Count",                                   "Count",                                                       "Count",                                                       "Count",                                                          "Count",                                                          "Count",                          "Count",                                 "Count",                              "Count",                         "Count",                                                        "Count",                                              "Count",                                            "Count",                       "Count"
-               ,                 ,                                  "Person",                           "Person",                                  "Person",                                  "Person",                                                      "Person",                                                      "Person",                                                         "Person",                                                         "Person",                         "Person",                                "Person",                             "Person",                        "Person",                                                       "Person",                                             "Person",                                           "Person",                      "Person"
-               ,                 ,               "Economic activity (T016A)",        "Economic activity (T016A)",               "Economic activity (T016A)",               "Economic activity (T016A)",                                   "Economic activity (T016A)",                                   "Economic activity (T016A)",                                      "Economic activity (T016A)",                                      "Economic activity (T016A)",      "Economic activity (T016A)",             "Economic activity (T016A)",          "Economic activity (T016A)",     "Economic activity (T016A)",                                    "Economic activity (T016A)",                          "Economic activity (T016A)",                        "Economic activity (T016A)",   "Economic activity (T016A)"
-"Geographic ID","Geographic Area","Total: All categories: Economic activity","Total: Economically active: Total","Economically active: Employee: Part-time","Economically active: Employee: Full-time","Economically active: Self-employed with employees: Part-time","Economically active: Self-employed with employees: Full-time","Economically active: Self-employed without employees: Part-time","Economically active: Self-employed without employees: Full-time","Economically active: Unemployed","Economically active: Full-time student","Total: Economically inactive: Total","Economically inactive: Retired","Economically inactive: Student (including full-time students)","Economically inactive: Looking after home or family","Economically inactive: Long-term sick or disabled","Economically inactive: Other"
-    "E92000001",        "England",                                "38881374",                         "27183134",                                 "5333268",                                "15016564",                                                      "148074",                                                      "715271",                                                         "990573",                                                        "1939714",                        "1702847",                               "1336823",                           "11698240",                       "5320691",                                                      "2255831",                                            "1695134",                                          "1574134",                      "852450"
-    "W92000004",          "Wales",                                 "2245166",                          "1476735",                                  "313022",                                  "799348",                                                        "7564",                                                       "42107",                                                          "43250",                                                         "101108",                          "96689",                                 "73647",                             "768431",                        "361501",                                                       "133880",                                              "86396",                                           "140760",                       "45894"
- -

Key characteristics of the CSV file are:

-
    -
  • summary information for entire table provided at beginning of file
  • -
  • multiple header lines
  • -
  • comma delimited cells
  • -
  • double quote escaping of text
  • -
- - -

- Requires: - MultipleHeadingRows and - AnnotationAndSupplementaryInfo. -

- - -

Correct interpretation of the statistics requires additional qualification or - awareness of context. To achieve this the complementary html file includes supplementary information and - annotations pertinent to the data published in the accompanying CSV file. Annotation or references may - be applied to:

-
    -
  • a group of tables
  • -
  • an entire table
  • -
  • a row
  • -
  • a coloumn
  • -
  • an individual cell
  • -
- -

- Requires: - AnnotationAndSupplementaryInfo. -

-

Furthermore, these statistical data sets make frequent use of predefined category codes - and geographic regions. Dataset QS601EW Economic activity includes two - examples:

-
    -
  • topic category T016A; identifying the statistical measure type - in this case, - whether a person aged 16 or over was in work or looking for work in the week before the census
  • -
  • geographic area codes for 2011 Administrative Hierarchy and 2011 Westminster Parliamentary - Constituency Hierarchy
  • -
-

At present there is no standardised mechanism to associate the catagory codes, - provided as plain text, with their authoritative definitions.

-

- Requires: - AssociationOfCodeValuesWithExternalDefinitions. -

-

Finally, reuse of the statistical data is also inhibited by a lack of explicit definition - of the meaning of column headings.

-

- Requires: - SemanticTypeDefinition. -

-
-
-

2.3 Use Case #3 - Creation of consolidated global land surface temperature climate - databank

-

- (Contributed by Jeremy Tandy) -

-

Climate change and global warming have become one of the most pressing environmental - concerns in society today. Crucial to predicting future change is an understanding of how - the world’s historical climate, with long duration instrumental records of climate being - central to that goal. Whilst there is an abundance of data recording the climate at - locations the world over, the scrutiny under which climate science is put means that much - of this data remains unused leading to a paucity of data in some regions with which to - verify our understanding of climate change.

- -

The International Surface Temperature - Initiative seeks to create a consolidated global land surface temperatures databank - as an open and freely available resource to climate scientists.

- -

To achieve this goal, climate datasets, known as “decks”, are gathered from participating - organisations and merged into a combined dataset using a scientifically peer reviewed method which assesses the data records for inclusion against a variety of - criteria.

- -

Given the need for openness and transparency in creating the databank, it is essential - that the provenance of the source data is clear. Original source data, particularly for - records captured prior to the mid-twentieth century, may be in hard-copy form. In order to - incorporate the widest possible scope of source data, the International Surface - Temperature Initiative is supported by data rescue - activities to digitise hard copy records.

- -

The data is, where possible, published in the following four stages:

-
    -
  • Stage 0: raw digital image of hard copy records or information as to hard copy - location
  • -
  • Stage 1: data in native format provided
  • -
  • Stage 2: data converted into a common format and with provenance and version control - information appended
  • -
  • Stage 3: merged collation of stage 2 data within a single consolidated dataset
  • -
- -

The Stage 1 data is typically provided in tabular form - the most common variant is - white-space delimited ASCII files. Each data deck comprises multiple files which are - packaged as a compressed tar ball (.tar.gz). Included within the compressed - tar ball package, and provided alongside, is a read-me file providing unstructured - supplementary information. Summary information is often embedded at the top of each - file.

- -

For example, see the Ugandan Stage 1 data deck (local copy) and associated readme file (local copy).

- -

The Ugandan Stage 1 data deck appears to be comprised of two discrete datasets, each - partitioned into a sub-directory within the tar ball: uganda-raw and - uganda-bestguess. Each sub-directory includes a Microsoft Word document - providing supplementary information about the provenance of the dataset; of particular - note is that uganda-raw is collated from 9 source datasets whilst - uganda-bestguess provides what is considered by the data publisher to be - the best set of values with duplicate values discarded.

- -

- Requires: - AnnotationAndSupplementaryInfo. -

- -

Dataset uganda-raw is split into 96 discrete files, each providing maximum, - minimum or mean monthly air temperature for one of the 32 weather observation stations - (sites) included in the data set. Similarly, dataset uganda-bestguess is - partitioned into discrete files; this case just 3 files each of which provide maximum, - minimum or mean monthly air temperature data for all sites. The mapping from data file to - data sub-set is described in the Microsoft Word document.

- -

- Requires: - CsvAsSubsetOfLargerDataset. -

- -

A snippet of the data indicating maximum monthly temperature for Entebbe, Uganda, from - uganda-raw is provided below. File = 637050_ENTEBBE_tmx.txt

- -
Example 2
637050  ENTEBBE
-5
-ENTEBBE BEA     0.05    32.45   3761F
-ENTEBBE GHCNv3G 0.05    32.45   1155M
-ENTEBBE ColArchive      0.05    32.45   1155M
-ENTEBBE GSOD    0.05    32.45   1155M
-ENTEBBE NCARds512       0.05    32.755  1155M
-
-Tmax
-{snip}
-1935.04	27.83	27.80	27.80	-999.00	-999.00
-1935.12	25.72	25.70	25.70	-999.00	-999.00
-1935.21	26.44	26.40	26.40	-999.00	-999.00
-1935.29	25.72	25.70	25.70	-999.00	-999.00
-1935.37	24.61	24.60	24.60	-999.00	-999.00
-1935.46	24.33	24.30	24.30	-999.00	-999.00
-1935.54	24.89	24.90	24.90	-999.00	-999.00
-{snip}
- -

The key characteristics are:

-
    -
  • white space delimited; this is not strictly a CSV file
  • -
  • summary information pertinent to the “data rows” is included at the beginning of the - data file
  • -
  • row, column and cell value interpretation is informed by accompanying Microsoft Word - document; human intervention is required to unambiguously determine semantics, e.g. the - meaning of each column, the unit of measurement
  • -
  • the observed property is defined as “Tmax”; there is no reference to an authoritative - definition describing that property
  • -
  • there is no header line providing column names
  • -
  • the year and month (column 1) is expressed as a decimal value; e.g. 1901.04 – - equivalent to January, 1901
  • -
  • multiple temperature values (“replicates”) are provided for each row; one from each of - the sources defined in the header, e.g. BEA (British East Africa), - GHCNv3G, ColArchive, GSOD and - NCARds512
  • -
  • the provenance of specific cell values cannot be asserted; for example, data values - for 1935 observed at Entebbe are digitised from digital images published in PDF (local copy)
  • -
-

A snippet of the data indicating maximum monthly temperature for all stations in Uganda - from uganda-bestguess is provided below (truncated to 9 columns). File = ug_tmx_jrc_bg_v1.0.txt

- -
Example 3
ARUA	BOMBO	BUKALASA	BUTIABA	DWOLI	ENTEBBE AIR	FT PORTAL	GONDOKORO	[…]
-{snip}
-1935.04	-99.00	-99.00	-99.00	-99.00	-99.00	27.83	-99.00	-99.00	[…]
-1935.12	-99.00	-99.00	-99.00	-99.00	-99.00	25.72	-99.00	-99.00	[…]
-1935.21	-99.00	-99.00	-99.00	-99.00	-99.00	26.44	-99.00	-99.00	[…]
-1935.29	-99.00	-99.00	-99.00	-99.00	-99.00	25.72	-99.00	-99.00	[…]
-1935.37	-99.00	-99.00	-99.00	-99.00	-99.00	24.61	-99.00	-99.00	[…]
-1935.46	-99.00	-99.00	-99.00	-99.00	-99.00	24.33	-99.00	-99.00	[…]
-1935.54	-99.00	-99.00	-99.00	-99.00	-99.00	24.89	-99.00	-99.00	[…]
-{snip}
- -

Many of the characteristics concerning the “raw” file are exhibited here too. - Additionally, we see that:

-
    -
  • the delimiter is now tab (U+0009)
  • -
  • metadata is entirely missing from this file, requiring human intervention to combine - the filename token (tmx) with supplementary information in the accompanying - Microsoft Word document to determine the semantics
  • -
- -

At present, the global surface temperature databank comprises 25 Stage 1 data decks for - monthly temperature observations. These are provided by numerous organisations in - heterogeneous forms. In order to merge these data decks into a single combined dataset, - each data deck has to be converted into a standard form. Columns consist of: station - name, latitude, longitude, altitude, - date, maximum monthly temperature, minimum monthly - temperature, mean monthly temperature plus additional provenance - information.

- -

An example Stage 2 data file is given for Entebbe, Uganda, below. File = uganda_000000000005_monthly_stage2

- -
Example 4
{snip}
-ENTEBBE                            0.0500    32.4500  1146.35 193501XX  2783  1711  2247 301/109/101/104/999/999/999/000/000/000/102
-ENTEBBE                            0.0500    32.4500  1146.35 193502XX  2572  1772  2172 301/109/101/104/999/999/999/000/000/000/102
-ENTEBBE                            0.0500    32.4500  1146.35 193503XX  2644  1889  2267 301/109/101/104/999/999/999/000/000/000/102
-ENTEBBE                            0.0500    32.4500  1146.35 193504XX  2572  1817  2194 301/109/101/104/999/999/999/000/000/000/102
-ENTEBBE                            0.0500    32.4500  1146.35 193505XX  2461  1722  2092 301/109/101/104/999/999/999/000/000/000/102
-ENTEBBE                            0.0500    32.4500  1146.35 193506XX  2433  1706  2069 301/109/101/104/999/999/999/000/000/000/102
-ENTEBBE                            0.0500    32.4500  1146.35 193507XX  2489  1628  2058 301/109/101/104/999/999/999/000/000/000/102
-{snip}
- -

Because of the heterogeneity of the Stage 1 data decks, bespoke data processing programs - were required for each data deck consuming valuable effort and resource in simple data - pre-processing. If the semantics, structure and other supplementary metadata pertinent to - the Stage 1 data decks had been machine readable, then this data homogenisation stage - could have been avoided altogether. Data provenance is crucial to this initiative, - therefore it would be beneficial to be able to associate the supplementary metadata - without needing to edit the original data files.

- -

- Requires: - R-AssociationOfCodeValuesWithExternalDefinitions, - SyntacticTypeDefinition, - SemanticTypeDefinition, - MissingValueDefinition, - NonStandardCellDelimiter and - ZeroEditAdditionOfSupplementaryMetadata. -

- -

The data pre-processing tools created to parse each Stage 1 data deck into the standard - Stage 2 format and the merge process to create the consolidated Stage 3 data set were - written using the software most familiar to the participating scientists: Fortran 95. The - merge software source code is available online. It is worth noting that this sector of the scientific community also - commonly uses IDL and is - gradually adopting Python as the default software - language choice.

- -

The resulting merged dataset is published in several formats – including tabular text. - The GHCN-format merged dataset (available from the US National Climatic Data Center's FTP site) comprises of several files: merged data and withheld - data (e.g. those data that did not meet the merge criteria) each with an associated - “inventory” file.

- -

A snippet of the inventory for merged data is provided below; each row describing one of - the 31,427 sites in the dataset. File = merged.monthly.stage3.v1.0.0-beta4.inv

- -
Example 5
{snip}
-REC41011874   0.0500  32.4500 1155.0 ENTEBBE_AIRPO
-{snip}
- -

The columns are: station identifier, latitude, - longitude, altitude (m) and station name. The - data is fixed format rather than delimited.

- -

Similarly, a snippet of the merged data itself is provided. Given that the original - .dat file is a largely unmanageable 422.6 MB in size, a subset is provided. - File = merged.monthly.stage3.v1.0.0-beta4.snip

- -
Example 6
{snip}
-REC410118741935TAVG 2245    2170    2265    2195    2090    2070    2059    2080    2145    2190    2225    2165
-REC410118741935TMAX 2780    2570    2640    2570    2460    2430    2490    2520    2620    2630    2660    2590
-REC410118741935TMIN 1710    1770    1890    1820    1720    1710    1629    1640    1670    1750    1790    1740
-{snip}
- -

The columns are: station identifier, year, quantity - kind and the quantity values for months January to December in that year. Again, - the data is fixed format rather than delimited.

- -

Here we see the station identifier REC41011874 being used as a foreign key - to refer to the observing station details; in this case Entebbe Airport. Once again, there - is no metadata provided within the file to describe how to interpret each of the data - values.

- -

- Requires: - ForeignKeyReferences. -

- -

The resulting merged dataset provides time series of how the observed climate has changed - over a long duration at approximately 32000 locations around the globe. Such instrumental - climate records provide a basis for climate research. However, it is well known that these - climate records are usually affected by inhomogeneities (artifical shifts) due to changes - in the measurement conditions (e.g. relocation, modification or recalibration - of the instrument etc.). As these artificial shifts often have the same magnitude as the - climate signal, such as long-term variations, trends or cycles, a direct analysis of the - raw time-series data can lead to wrong conclusions about climate change.

-

Statistical homogenisation procedures are used to detect and correct these artificial shifts. - Once detected, the raw time-series data is annotated to indicate the presence of artifical - shifts in the data, details of the homogenisation procedure undertaken and, where possible, - the reasons for those shifts.

- -

- Requires: - AnnotationAndSupplementaryInfo. -

- -

Future iterations of the global land surface temperatures databank are aniticipated to - include quality controlled (Stage 4) and homogenised (Stage 5) datasets derived from the - merged dataset (Stage 3) outlined above.

- -
- -
-

2.4 Use Case #4 - Publication of public sector roles and salaries

-

- (Contributed by Jeni Tennison) -

-

In line with the - - G8 open data charter Principle 4: Releasing data for improved governance,the - UK Government publishes information about public sector roles and salaries.

-

The collection of this information is managed by the - Cabinet Office and subsequently - published via the UK Government data portal at data.gov.uk.

-

In order to ensure a consistent return from submitting departments and agencies, the - Cabinet Office mandated that each response conform to a data definition schema, which is described within a narrative PDF document. Each submission comprises a - pair of CSV files - one for senior roles and another for junior roles.

- -

- Requires: - GroupingOfMultipleTables, - WellFormedCsvCheck and - CsvValidation. -

- -

The submission for senior roles from the Higher Education Funding Council for England (HEFCE) is provided - below to illustrate. White space has been added for clarity. File = HEFCE_organogram_senior_data_31032011.csv

- -
Example 7
Post Unique Reference,              Name,Grade,             Job Title,                Job/Team Function,                            Parent Department,                                Organisation,                             Unit,     Contact Phone,         Contact E-mail,Reports to Senior Post,Salary Cost of Reports (£),FTE,Actual Pay Floor (£),Actual Pay Ceiling (£),,Profession,Notes,Valid?
-                90115,        Steve Egan,SCS1A,Deputy Chief Executive,  Finance and Corporate Resources,Department for Business Innovation and Skills,Higher Education Funding Council for England,  Finance and Corporate Resources,     0117 931 7408,     s.egan@hefce.ac.uk,                 90334,                   5883433,  1,              120000,                124999,,   Finance,     ,     1
-                90250,     David Sweeney,SCS1A,              Director,"Research, Innovation and Skills",Department for Business Innovation and Skills,Higher Education Funding Council for England,"Research, Innovation and Skills",     0117 931 7304, d.sweeeney@hefce.ac.uk,                 90334,                   1207171,  1,              110000,                114999,,    Policy,     ,     1
-                90284,       Heather Fry,SCS1A,              Director,      Education and Participation,Department for Business Innovation and Skills,Higher Education Funding Council for England,      Education and Participation,     0117 931 7280,      h.fry@hefce.ac.uk,                 90334,                   1645195,  1,              100000,                104999,,    Policy,     ,     1
-                90334,Sir Alan Langlands, SCS4,       Chief Executive,                  Chief Executive,Department for Business Innovation and Skills,Higher Education Funding Council for England,                            HEFCE,0117 931 7300/7341,a.langlands@hefce.ac.uk,                    xx,                         0,  1,              230000,                234999,,    Policy,     ,     1
- -

Similarly, a snippet of the junior role submission from HEFCE is provided. Again, - white space has been added for clarity. File = HEFCE_organogram_junior_data_31032011.csv

- -
Example 8
.                           Parent Department,                                Organisation,                           Unit,Reporting Senior Post,Grade,Payscale Minimum (£),Payscale Maximum (£),Generic Job Title,Number of Posts in FTE,          Profession
-Department for Business Innovation and Skills,Higher Education Funding Council for England,    Education and Participation,                90284,    4,               17426,               20002,    Administrator,                     2,Operational Delivery
-Department for Business Innovation and Skills,Higher Education Funding Council for England,    Education and Participation,                90284,    5,               19546,               22478,    Administrator,                     1,Operational Delivery
-Department for Business Innovation and Skills,Higher Education Funding Council for England,Finance and Corporate Resources,                90115,    4,               17426,               20002,    Administrator,                  8.67,Operational Delivery
-Department for Business Innovation and Skills,Higher Education Funding Council for England,Finance and Corporate Resources,                90115,    5,               19546,               22478,    Administrator,                   0.5,Operational Delivery
-{snip}        
- -

Key characteristics of the CSV files are:

-
    -
  • single header line
  • -
  • comma delimited cells
  • -
  • double quote escaping of text cells including the delimiter character (comma)
  • -
- -

Within the senior role CSV the cell Post Unique Reference provides - a primary key within the data file for each row. In addition, it provides a - unique identifier for the entity described within a given row. In order for the - entity to be referenced from outside this datafile, the local identifier - must be mapped to a globally unique identifier such as a URI.

- -

- Requires: - PrimaryKey and - URIMapping. -

- -

This unique identifier is referenced both from within the senior post dataset, - Reports to Senior Post, and within the junior post dataset, Reporting - Senior Post in order to determine the relationships within the organisational - structure.

- -

- Requires: - ForeignKeyReferences. -

- -

For the most senior role in a given organisation, the Reports to Senior Post - cell is expressed as xx denoting that this post does not report to anyone - within the organisation.

- -

- Requires: - MissingValueDefinition. -

- -

The public sector roles and salaries information is published at - data.gov.uk using an interactive "Organogram Viewer" widget implemented using javascript. - The HEFCE data can be visualized - - here. For convenience, a screenshot is provided in - Fig. 1 Screenshot of Organogram Viewer web application showing HEFCE data.

- -
- data.gov.uk-roles-and-salaries-browser.png -
Fig. 1 Screenshot of Organogram Viewer web application showing HEFCE data
-
- -

In order to create this visualization, each pair of tabular datasets were transformed - into RDF and uploaded into a triple store exposing a SPARQL end-point which the - interactive widget then queries to acquire the necessary data. An example of the derived RDF - is provided in file - HEFCE_organogram_31032011.rdf.

- -

The transformation from CSV to RDF required bespoke software, supplementing the content - in the CSV files with additional information such as the RDF properties for each column. - The need to create and maintain bespoke software incurs costs that may be avoided through - use of a generic CSV-to-RDF transformation mechanism.

- -

- Requires: - CsvToRdfTransformation. -

-
- -
-

2.5 Use Case #5 - Publication of property transaction data

-

- (Contributed by Andy Seaborne) -

-

The Land Registry is the - government department with responsibility to register the ownership of land and property - within England and Wales. Once land or property is entered to the Land Register - any ownership changes, mortgages or leases affecting that land or property are recorded.

- -

Their - Price paid data, dating from 1995 and consisting of more than 18.5 million records, - tracks the residential property sales in England and Wales that are lodged for registration. - This dataset is one of the most reliable sources of house price information in England and Wales.

- -

Residential property transaction details are extracted from a data warehouse system - and collated into a tabular dataset for each month. - The current monthly dataset is available online in both .txt and - .csv formats. Snippets of data for January 2014 are provided below. White space - has been added for clarity.

- -

- pp-monthly-update.txt (local copy)

-
Example 9
{C6428808-DC2A-4CE7-8576-0000303EF81B},137000,2013-12-13 00:00, "B67 5HE","T","N","F","130","",       "WIGORN ROAD",       "",   "SMETHWICK",            "SANDWELL",       "WEST MIDLANDS","A"
-{16748E59-A596-48A0-B034-00007533B0C1}, 99950,2014-01-03 00:00, "PE3 8QR","T","N","F", "11","",             "RISBY","BRETTON","PETERBOROUGH","CITY OF PETERBOROUGH","CITY OF PETERBOROUGH","A"
-{F10C5B50-92DD-4A69-B7F1-0000C3899733},355000,2013-12-19 00:00,"BH24 1SW","D","N","F", "55","","NORTH POULNER ROAD",       "",    "RINGWOOD",          "NEW FOREST",           "HAMPSHIRE","A"
-{snip}
- -

- pp-monthly-update-new-version.csv (local copy)

-
Example 10
"{C6428808-DC2A-4CE7-8576-0000303EF81B}","137000","2013-12-13 00:00", "B67 5HE","T","N","F","130","",       "WIGORN ROAD",       "",   "SMETHWICK",            "SANDWELL",       "WEST MIDLANDS","A"
-"{16748E59-A596-48A0-B034-00007533B0C1}", "99950","2014-01-03 00:00", "PE3 8QR","T","N","F", "11","",             "RISBY","BRETTON","PETERBOROUGH","CITY OF PETERBOROUGH","CITY OF PETERBOROUGH","A"
-"{F10C5B50-92DD-4A69-B7F1-0000C3899733}","355000","2013-12-19 00:00","BH24 1SW","D","N","F", "55","","NORTH POULNER ROAD",       "",    "RINGWOOD",          "NEW FOREST",           "HAMPSHIRE","A"
-{snip}
- -

There seems to be little difference between the two formats with the exception that all - cells within the .csv file are escaped with a pair of double quotes ("").

- -

The header row is absent. Information regarding the meaning of each column and the - abbreviations used within the dataset are provided in a complementary - FAQ document. - The column headings are provided below along with some supplemental detail:

- -
    -
  1. Transaction unique identifier
  2. -
  3. Price - sale price stated on the Transfer deed
  4. -
  5. Date of Transfer - date when the sale was completed, as stated on the Transfer deed
  6. -
  7. Postcode
  8. -
  9. Property Type - D (detatched), S (semi-detatched), - T (terraced), F (flats/maisonettes)
  10. -
  11. Old/New - Y (newly built property) and - N (established residential building)
  12. -
  13. Duration - relates to tenure; F (freehold) and L (leasehold)
  14. -
  15. PAON - Primary Addressable Object Name
  16. -
  17. SAON - Secondary Addressable Object Name
  18. -
  19. Street
  20. -
  21. Locality
  22. -
  23. Town/City
  24. -
  25. Local Authority
  26. -
  27. County
  28. -
  29. Record status - indicates status of the transaction; A - (addition of a new transaction), C (correction of an existing transaction) - and D (deleted transaction)
  30. -
- -

- Requires: - AnnotationAndSupplementaryInfo. -

- -

Each row, or record, within the tabular dataset describes a property transaction. The - Transaction unique identifier column provides a unique identifier for that - property transaction. Given that transactions may be amended, this identifier cannot - be treated as a primary key for rows within the dataset as the identifier may occur - more than once. the primary key for each record. In order for the - property transaction to be referenced from outside this dataset, the local identifier - must be mapped to a globally unique identifier such as a URI.

- -

- Requires: - URIMapping. -

- -

Each transaction record makes use of predefined category codes as outlined above; e.g. - Duration may be F (freehold) or L (leasehold). Furthermore, - geographic descriptors are commonly used. Whilst there is no attempt to - link these descriptors to specific geographic identifiers, such a linkage is likely - to provide additional utility when aggregating transaction data by location or region for further - analysis. At present there is no standardised mechanism to associate the catagory codes, - provided as plain text, or geographic identifiers with their authoritative definitions.

- -

- Requires: - AssociationOfCodeValuesWithExternalDefinitions. -

- -

The collated monthly transaction dataset is used as the basis for updating the Land Registry's - information systems; in this case the data is persisted as RDF triples within a triple store. - A SPARQL end-point - and accompanying data definitions are provided - by the Land Registry allowing users to query the content of the triple store.

- -

In order to update the triple store, the monthly transaction dataset is converted into RDF. The - value of the Record status cell for a given row informs the update process: add, update or - delete. Bespoke software has been created by the Land Registry to transformation from CSV to RDF. - The transformation requires supplementary information not present in the CSV, such as the RDF - properties for each column specified in the - data definitions. The need to create and maintain bespoke software incurs costs that may - be avoided through use of a generic CSV-to-RDF transformation mechanism.

- -

- Requires: - CsvToRdfTransformation. -

- -
Note

The monthly transaction dataset contains in the order of 100,000 records; - any transformation will need to scale accordingly.

- -

In parallel to providing access via the - SPARQL end-point, the Land Registry also provides aggregated sets of transaction data. Data is - available as a single file containing all transactions since 1995, or partitioned by year. - Given that the complete dataset is approaching 3GB in size, the annual partitions provide a - far more manageable method to download the property transaction data. However, each annual - partition is only a subset of the complete dataset. It is important to be able to both make - assertions about the complete dataset (e.g. publication date, license etc.) and to be - able to understand how an annual partition relates to the complete dataset and other partitions.

- -

- Requires: - CsvAsSubsetOfLargerDataset. -

- -
-
-

2.6 Use Case #6 - Journal Article Solr Search Results

-

- (Contributed by Alf Eaton) -

-

When performing literature searches researchers need to retain a persisted collection of - journal articles of interest in a local database compiled from on-line publication websites. - In this use case a researcher wants to retain a local personal journal article publication - database based on the search results from Public Library - of Science. PLOS One is a nonprofit open access scientific publishing project aimed at creating - a library of open access journals and other scientific literature under an open content license. -

-

- In general this use case also illustrates the utility of CSV as a convenient exchange format for pushing - tabular data between software components: -

-
    -
  • making it easier to interpret the data on subsequent ingest -
  • -
  • being able to work with manageable chunks of a tabular data set (e.g. only subsets of the tabular dataset - are ever materialised in a single CSV file, and we often want to know how that subset fits within the larger - whole). -
  • -
- -

The PLOS website features a Solr index search - engine (Live Search) which can return query results in - XML, - JSON - or in a more concise CSV format. - The output from the CSV Live Search is illustrated below: -

-
Example 11
id,doi,publication_date,title_display,author
-10.1371/journal.pone.0095131,10.1371/journal.pone.0095131,2014-06-05T00:00:00Z,"Genotyping of French <i>Bacillus anthracis</i> Strains Based on 31-Loci Multi Locus VNTR Analysis: Epidemiology, Marker Evaluation, and Update of the Internet Genotype Database","Simon Thierry,Christophe Tourterel,Philippe Le Flèche,Sylviane Derzelle,Neira Dekhil,Christiane Mendy,Cécile Colaneri,Gilles Vergnaud,Nora Madani"
-10.1371/journal.pone.0095156,10.1371/journal.pone.0095156,2014-06-05T00:00:00Z,Pathways Mediating the Interaction between Endothelial Progenitor Cells (EPCs) and Platelets,"Oshrat Raz,Dorit L Lev,Alexander Battler,Eli I Lev"
-10.1371/journal.pone.0095275,10.1371/journal.pone.0095275,2014-06-05T00:00:00Z,Identification of Divergent Protein Domains by Combining HMM-HMM Comparisons and Co-Occurrence Detection,"Amel Ghouila,Isabelle Florent,Fatma Zahra Guerfali,Nicolas Terrapon,Dhafer Laouini,Sadok Ben Yahia,Olivier Gascuel,Laurent Bréhélin"
-10.1371/journal.pone.0096098,10.1371/journal.pone.0096098,2014-06-05T00:00:00Z,Baseline CD4 Cell Counts of Newly Diagnosed HIV Cases in China: 2006–2012,"Houlin Tang,Yurong Mao,Cynthia X Shi,Jing Han,Liyan Wang,Juan Xu,Qianqian Qin,Roger Detels,Zunyou Wu"
-10.1371/journal.pone.0097475,10.1371/journal.pone.0097475,2014-06-05T00:00:00Z,Crystal Structure of the Open State of the <i>Neisseria gonorrhoeae</i> MtrE Outer Membrane Channel,"Hsiang-Ting Lei,Tsung-Han Chou,Chih-Chia Su,Jani Reddy Bolla,Nitin Kumar,Abhijith Radhakrishnan,Feng Long,Jared A Delmar,Sylvia V Do,Kanagalaghatta R Rajashankar,William M Shafer,Edward W Yu"
-

Versions of the search results provided at time of writing are available locally in - XML, - JSON and - CSV formats for reference.

- -

A significant difference between the CSV formatted results and those of JSON - and XML is the absence of information about how the set of results provided in the HTTP response fit within - the complete set of results that match the Live Search request. The information provided - in the JSON and XML search results states both the total number of "hits" for the Live - Search request and the start index within the complete set (zero for the example provided - here as the ?start={offset} query parameter is absent from the request).

- -
Note
-

Other common methods of splitting up large datasets into manageable chunks include - partitioning by time (e.g. all the records added to a dataset in a given day may be - exported in a CSV file). Such partitioning allows regular updates to be shared. However, - in order to recombine those time-based partitions into the complete set, one needs to know - the datetime range for which that dataset partition is valid. Such information should be - available within a CSV metadata description.

-
- -

- Requires: - CsvAsSubsetOfLargerDataset. -

- -

- To be useful to a user maintaining a PLOS One search results need to be returned in an organized and - consistent tabular format. This includes:

-
    -
  • mapping search critiera cells to columns returned in the search results -
  • -
  • ordering the columns to match the order of the search criteria cells. -
  • -
-

Lastly because the researcher may use different search criteria the header row plays an important role - later for the researcher wanting to combine multiple literature searches into their database. - The researcher will use the header column names returned in the first row as a way to identify - each column type. -

-

- Requires: - WellFormedCsvCheck and - CsvValidation. -

-

Search results returned in a tabular format can contain cell values that organized in data structures - also known as micro formats. In example above the publication_date and authors list represent two - micro formats that are represented in a recognizable pattern that can be parsed by software or - by the human reader. In the case of the author column, microformats provide the advantage of being - able to store a single author's name or multiple authors names separated by a comma delimiter. - Because each author cell value is surrounded by quotes a parser can choose to ignore the - data structure or address it.

-

Furthermore, note that the values of the title_display column contain markup. Whilst - these values may be treated as pure text, it provides an example of how structure or - syntax may be embedded within a cell.

-

- Requires: - CellMicrosyntax and - RepeatedProperties. -

- -
- -
-

2.7 Use Case #7 - Reliability Analyzes of Police Open Data

-

- (Contributed by Davide Ceolin) -

-

Several Web sources expose datasets about UK crime statistics. - These datasets vary in format (e.g. maps vs. CSV files), timeliness, aggregation level, etc. - Before being published on the Web, these data are processed to preserve the privacy of the people - involved, but again the processing policy varies from source to source.

-

Every month, the UK Police Home Office publishes (via data.police.uk) CSV files that report crime - counts, aggregated on geographical basis (per address or police neighbourhood) and on type basis. - Before publishing, data are smoothed, that is, grouped in predefined areas and assigned to the - mid point of each area. Each area has to contain a minimum number of physical addresses. The goal - of this procedure is to prevent the reconstruction of the identity of the people involved in the - crimes.

-

Over time, the policies adopted for preprocessing these data have changed, but data previously - published have not been recomputed. Therefore, datasets about different months present relevant - differences in terms of crime types reported and geographical aggregation (e.g. initially, each - geographical area for aggregation had to include at least 12 physical addresses. Later, this - limit was lowered to 8).

-

These policies introduce a controlled error in the data for privacy reasons, but these changes - in the policies imply the fact that different datasets adhere differently to the real data, i.e. - they present different reliability levels. Previous work provided two procedures for measuring - and comparing the reliability of the datasets, but in order to automate and improve these procedures, - it is crucial to understand the meaning of the columns, the relationships between columns, and how the - data rows have been computed.

-

For instance, here is a snippet from a dataset about crime happened in Hampshire in April 2012:

-
Example 12
Month,	Force,			Neighbourhood,	Burglary,	Robbery,	Vehicle crime,	Violent crime,	Anti-social behaviour,	Other crime
-{snip}
-2011-04	Hampshire Constabulary,	2LE11,		2,		0,		1,		6,		14,			6
-2011-04	Hampshire Constabulary,	2LE10,		1,		0,		2,		4,		15,			6
-2011-04	Hampshire Constabulary,	2LE12,		3,		0,		0,		4,		25,			21
-{snip}
-

and that dataset reports 248 entries, while in October 2012, the crime types we can see are increased to 11:

-
Example 13
Month,	Force,			Neighbourhood,	Burglary,	Robbery,	Vehicle crime,	Violent crime,	Anti-social behaviour,	Criminal damage and arson,	Shoplifting,	Other theft,	Drugs,	Public disorder and weapons,	Other crime
-{snip}
-2012-10,Hampshire Constabulary,	2LE11,		1,		0,		1,		2,		8,			0,				0,		1,		1,	0,				1
-2012-10,Hampshire Constabulary,	1SY01,		9,		1,		12,		8,		87,			17,				12,		14,		13,	7,				4
-2012-10,Hampshire Constabulary,	1SY02,		11,		0,		11,		20,		144,			39,				2,		12,		9,	8,				5
-  {snip}
-

This dataset reports 232 entries.

- -

In order to properly handle the columns, - it is crucial to understand the type of the data contained therein. Given the context, knowing - this information would reveal an important part of the column meaning (e.g. to identify dates). -

-

- Requires: - SyntacticTypeDefinition. -

-

- Also, it is important to understand the precise semantics of each column. - This is relevant for two reasons. First, to identify relations between columns (e.g. some crime types - are siblings, while other are less semantically related). Second, to identify semantic relations between - columns in heterogeneous datasets (e.g. a column in one dataset may correspond to the sum of two or more - columns in others). -

-

- Requires: - SemanticTypeDefinition. -

-

Lastly, datasets with different row numbers are the result of different smoothing procedures. Therefore, it would - be important to trace and access their provenance, in order to facilitate their comparison.

-

- Requires: - AnnotationAndSupplementaryInfo. -

-
-
-

2.8 Use Case #8 - Analyzing Scientific Spreadsheets

-

- (Contributed by Alf Eaton, Davide Ceolin, Martine de Vos) -

-

A paper published in Nature Immunology in December 2012 compared changes in expression of a range of genes in response to treatment with two different cytokines. The results were published in the paper as graphic figures, and the raw data was presented in the form of supplementary spreadsheets, as Excel files (local copy).

- -

Having at disposal both the paper and the results, a scientist may wish to reproduce the experiment, check if the results he obtains coincide with those published, and compare those results with others, provided by different studies about the same issues.

- -

Because of the size of the datasets and of the complexity of the computations, it could be necessary to perform such analyses and comparisons by means of properly defined software, typically by means of an R, Python or Matlab script. Such software would require as input the data contained in the Excel file. However, it would be difficult to write a parser to extract the information, for the reasons described below.

- -

To clarify the issues related to the spreadsheet parsing and analysis, we first present an example extrapolated from it. The example below shows a CSV encoding of the original Excel speadsheet converted using Mircosoft Excel 2007. White space has been added to aid clarity. (file = ni.2449-S3.csv)

- -
Example 14
Supplementary Table 2. Genes more potently regulated by IL-15,,,,,,,,,,,,,,,,,,
-            ,         ,     ,       ,         ,        ,          ,       ,         ,        ,          ,           ,         ,        ,          ,       ,         ,        ,
-   gene_name,   symbol, RPKM,       ,         ,        ,          ,       ,         ,        ,          ,Fold Change,         ,        ,          ,       ,         ,        ,
-            ,         ,     , 4 hour,         ,        ,          ,24 hour,         ,        ,          ,     4 hour,         ,        ,          ,24 hour,         ,        ,
-            ,         , Cont,IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM,IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM,    IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM,IL2_1nM,IL2_500nM,IL15_1nM,IL15_500nM
-NM_001033122,     Cd69,15.67,  46.63,   216.01,   30.71,    445.58,   9.21,    77.32,    4.56,     77.21,       2.98,    13.78,    1.96,     28.44,   0.59,     4.93,    0.29,      4.93
-   NM_026618,   Ccdc56, 9.07,  12.55,     9.25,    5.88,     14.33,  20.08,    20.91,   11.97,     22.69,       1.38,     1.02,    0.65,      1.58,   2.21,     2.31,    1.32,      2.50
-   NM_008637,    Nudt1, 9.31,   7.51,     8.60,   11.21,      6.84,  15.85,    25.14,    7.56,     22.77,       0.81,     0.92,    1.20,      0.73,   1.70,     2.70,    0.81,      2.45
-   NM_008638,   Mthfd2,58.67,  33.99,   245.87,   44.66,    167.87,  55.62,   204.50,   24.52,    176.51,       0.58,     4.19,    0.76,      2.86,   0.95,     3.49,    0.42,      3.01
-   NM_178185,Hist1h2ao, 7.13,  16.52,     7.82,    7.79,     16.99,  75.04,   290.72,   21.99,    164.93,       2.32,     1.10,    1.09,      2.38,  10.52,    40.78,    3.08,     23.13
-{snip}
- -

As we can see from the example, the table contains several columns of data that are measurements of gene expression in cells after treatment with two concentrations of two cytokines, measured after two periods of time, presented as both actual values and fold change. This can be represented in a table, but needs 3 levels of headings and several merged cells. In fact, the first row is the title of the table, the second to fourth rows are the table headers.

- -

We also see that the first column gene_name provides a unique identifier for the gene described in each row, with the second column symbol providing a - human readable notation for each gene - albeit a scientific human! It is necessary to determine which column, if any, provides the unique identifier for the entity which - each row describes. In order for the gene to be referenced from outside the datafile, e.g. to reconcile the information in this table with other information about the gene, the local identifier must be mapped to a globally unique identifier such as a URI.

- -

- Requires: - MultipleHeadingRows and - URIMapping. -

- -

The first column contains a GenBank identifier for each gene, with the column name "gene_name". The GenBank identifier provides a local identifier for each gene. This local identifier, e.g. “NM_008638”, can be converted to a fully qualified URI by adding a URI prefix, e.g. “http://www.ncbi.nlm.nih.gov/nuccore/NM_008638” allowing the gene to be uniquely and unambiguously identified.

- -

The second column contains the standard symbol for each gene, labelled as "symbol". These appear to be HUGO gene nomenclature symbols, but as there's no mapping it's hard to be sure which namespace these symbols are from.

- -

- Requires: - URIMapping. -

- -

As this spreadsheet was published as supplemental data for a journal article, there is little description of what the columns represent, even as text. There is a column labelled as "Cont", which has no description anywhere, but is presumably the background level of expression for each gene.

- -

- Requires: - SyntacticTypeDefinition and - SemanticTypeDefinition. -

- -

Half of the cells represent measurements, but the details of what those measurements are can only be found in the article text. The other half of the cells represent the change in expression over the background level. It is difficult to tell the difference without annotation that describes the relationship between the cells (or understanding of the nested headings). In this particular spreadsheet, only the values are published, and not the formulae that were used to calculate the derived values. The units of each cell are "expression levels relative to the expression level of a constant gene, Rpl7", described in the text of the methods section of the full article.

- -

- Requires: - UnitMeasureDefinition. -

- -

The heading rows contain details of the treatment that each cell received, e.g. "4 hour, IL2_1nM". It would be useful to be able to make this machine readable (i.e. to represent treatment with 1nM IL-2 for 4 hours).

- -

All the details of the experiment (which cells were used, how they were treated, when they were measured) are described in the methods section of the article. To be able to compare data between multiple experiments, a parser would also need to be able to understand all these parameters that may have affected the outcome of the experiment.

- -

- Requires: - AnnotationAndSupplementaryInfo. -

-
-
-

2.9 Use Case #9 - Chemical Imaging

-

- (Contributed by Mathew Thomas) -

-

Chemical imaging experimental work makes use of CSV formats to record its measurements. In this use case two examples are shown to depict scans from a mass spectrometer and corresponding FTIR corrected files that are saved into a CSV format automatically.

-

Mass Spectrometric Imaging (MSI) allows the generation of 2D ion density maps that help visualize molecules present in sections of tissues and cells. The combination of spatial resolution and mass resolution results in very large and complex data sets. The following is generated using the software Decon Tools, a tool to de-isotope MS spectra and to detect features from MS data using isotopic signatures of expected compounds, available freely at omins.pnnl.gov. The raw files generated by the mass spec instrument are read in and the processed output files are saved as CSV files for each line.

-

Fourier transform (FTIR) spectroscopy is a measurement technique whereby spectra are collected based on measurements of the coherence of a radiative source, using time-domain or space-domain measurements of the electromagnetic radiation or other type of radiation.

-

In general this use case also illustrates the utility of CSV as a means for scientists to collect and process their experimental results:

-
    -
  • making it easier for data to be loaded into a spreadsheet to examine results
  • -
  • being able to edit or select a portion of results to be plotted
  • -
  • making it possible to combine all scans to examine full 2D composite image.
  • -
-

The key characteristics are:

-
    -
  • CSV uses fixed number of cells
  • -
  • First row provides header cell tags, although the FTIR header begins with a comma
  • -
  • All values are comma separated, but they can be delimited by tabs as well.
  • -
  • Because the data is being collected from an instrument some of the columns represent measurement values taken during the experiment.
  • -
  • Left column is typically regarded as the row primary key.
  • -
-

- Requires: - WellFormedCsvCheck, - CsvValidation , - PrimaryKey and - UnitMeasureDefinition. -

-

Lastly, for Mass Spectrometry multiple CSV files need to be examined to view the sample image in its entirety.

-

- Requires: - CsvAsSubsetOfLargerDataset . -

-

Below are Mass Spectrometry instrument measurements (3 of 316 CSV rows) for a single line on a sample. It gives the mass-to-charge ranges, peak values, acquisition times and total ion current.

-
Example 15
scan_num,scan_time,type,bpi,bpi_mz,tic,num_peaks,num_deisotoped,info
-1,0,1,4.45E+07,576.27308,1.06E+09,132,0,FTMS + p NSI Full ms [100.00-2000.00]
-2,0.075,1,1.26E+08,576.27306,2.32E+09,86,0,FTMS + p NSI Full ms [100.00-2000.00]
-3,0.1475,1,9.53E+07,576.27328,1.66E+09,102,0,FTMS + p NSI Full ms [100.00-2000.00]
-

Below is a example FTIR data. The files from the instrument are baseline corrected, normalized and saved as CSV files automatically. Column 1 represents the wavelength # or range and the represent different formations like bound eps (extracellular polymeric substance), lose eps, shewanella etc. Below are (5 of 3161 rows) is a example:

-
Example 16
,wt beps,wt laeps,so16533 beps,so167333 laeps,so31 beps,so313375 lAPS,so3176345 bEPS,so313376 laEPS,so3193331 bEPS,so3191444 laeps,so3195553beps,so31933333 laeps
- 1999.82,-0.0681585,-0.04114415,-0.001671781,0.000589855,0.027188073,0.018877371,-0.066532177,-0.016899697,-0.077690018,0.001594551,-0.086573831,-0.08155035
- 1998.855,-0.0678255,-0.0409804,-0.001622611,0.000552989,0.027188073,0.01890847,-0.066132737,-0.016857071,-0.077346835,0.001733207,-0.086115107,-0.081042424
- 1997.89,-0.067603,-0.0410459,-0.001647196,0.000423958,0.027238845,0.018955119,-0.065904461,-0.016750515,-0.077101756,0.001733207,-0.085656382,-0.080590934
- 1996.925,-0.0673255,-0.04114415,-0.001647196,0.000258061,0.027289616,0.018970669,-0.065790412,-0.01664396,-0.076856677,0.001629215,-0.085281062,-0.080365189
-
- -
-

2.10 Use Case #10 - OpenSpending Data

-

- (Contributed by Stasinos Konstantopoulos) -

-

The OpenSpending and the Budgit platforms - provide plenty of useful datasets providing figures of national budget and spending of several countries. A journalist willing to investigate - about public spending fallacies can use these data as a basis for his research, and possibly compare them against different sources. - Similarly, a politician that is interested in developing new policies for development can, for instance, combine these data with those from the - World Bank to identify correlations and, possibly, dependencies to leverage. -

-

- Nevertheless, these uses of these datasets are possibly undermined by the following obstacles. -

-
    -
  • There are whole collections of datasets where a single currency is implied for all amounts given. See, for example, how all - Slovenian Budget Datasets are implicitly give amounts in Euros. Given that Slovenia joined the Eurozone in 2007, - the currency in has changed relatively recently. How do we know if a given table expresses currency amounts in “tolar” or “Euro”?

    -

    In order to be able to compare and combine these data with those provided by other sources like the - World Bank, - in an automatic manner, it would be necessary to explicitly define the currency of each column. Given that - the currency will be uniform for a specific table, the currency metadata may be indicated once for the entire table.

    -

    - Requires: - UnitMeasureDefinition. -

  • -
  • Similar issues are also in the Uganda Budget and Aid to Uganda, 2003-2007 file, - where there are four columns related to the amount. Of these, "amount" (Ugandan Shillings implied) - and "amount_dollars" (USD implied) are mandatory. The value of these columns is implicit, and moreover, as explained in the - complementary information, the Ugandan Shillings amount is computed by converting - the Dollars amount using a ratio determined on year basis (e.g. 2003/4: 1 USD = 1.847 UGX). Since this ratio varies on year basis, - and still corresponds to an approximation of the yearly value of the exchange rate, in order to properly use these data, it would - be preferable to know how these were obtained, or where to find such information. - -

    - Requires: - AssociationOfCodeValuesWithExternalDefinitions and - AnnotationAndSupplementaryInfo. -

    -
  • -
  • - Again in the Uganda Budget and Aid to Uganda, 2003-2007 file, if a row represents a - donation, then the values for the "amount_donor" (the amount in the donor's original currency) and "donorcurrency" (the donor's currency name) columns of - that row are reported. - Otherwise, the corresponding values are set to "0", to indicate that the row does not represent a donation and that the only relevant amounts for that row - are reported in the "amount" and "amount_dollars" column. To make these files machine-understandable, it is necessary to make this coding explicit. -

    - Requires: - MissingValueDefinition. -

    -
  • -
-
Note

The datahub.io platform that collects both OpenSpending and Budgit data allows publishing data in Simple Data Format (SDF), RDF - and other formats providing explicit semantics. Nevertheless, the datasets mentioned above present either implicit semantics and/or additional metadata files provided only - as attachment.

-
- - - -
-

2.11 Use Case #11 - City of Palo Alto Tree Data

-

- (Contributed by Eric Stephan) -

-

The City of Palo Alto, - California Urban Forest Section is responsible for maintaining and tracking - the cities public trees and urban forest. In a W3C Data on the Web Best Practices (DWBP) use case - discussion with Jonathan Reichental City of Palo Alto CIO, he brought to the working groups attention - a Tree Inventory maintained by the city in a - spreadsheet - form using Google Fusion. This use case represents use of tabular data to be representative of - geophysical tree locations - also provided in Google Map form where the user can point and click on trees to look up row - information about the tree. -

-

The example below illustrates the first few rows of data:

-
Example 17
GID,Private,Tree ID,Admin Area,Side of Street,On Street,From Street,To Street,Street_Name,Situs Number,Address Estimated,Lot Side,Serial Number,Tree Site,Species,Trim Cycle,Diameter at Breast Ht,Trunk Count,Height Code,Canopy Width,Trunk Condition,Structure Condition,Crown Condition,Pest Condition,Condition Calced,Condition Rating,Vigor,Cable Presence,Stake Presence,Grow Space,Utility Presence,Distance from Property,Inventory Date,Staff Name,Comments,Zip,City Name,Longitude,Latitude,Protected,Designated,Heritage,Appraised Value,Hardscape,Identifier,Location Feature ID,Install Date,Feature Name,KML,FusionMarkerIcon
-1,True,29,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,203,,Front,,2,Celtis australis,Large Tree Routine Prune,11,1,25-30,15-30,,Good,5,,,Good,2,False,False,Planting Strip,,44,10/18/2010,BK,,,Palo Alto,-122.1565172,37.4409561,False,False,False,,None,40,13872,,"Tree: 29 site 2 at 203 ADDISON AV, on ADDISON AV 44 from pl","<Point><coordinates>-122.156485,37.440963</coordinates></Point>",small_green
-2,True,30,,,EMERSON ST,CHANNING AV,ADDISON AV,ADDISON AV,203,,Left,,1,Liquidambar styraciflua,Large Tree Routine Prune,11,1,50-55,15-30,Good,Good,5,,,Good,2,False,False,Planting Strip,,21,6/2/2010,BK,,,Palo Alto,-122.1567812,37.440951,False,False,False,,None,41,13872,,"Tree: 30 site 1 at 203 ADDISON AV, on EMERSON ST 21 from pl","<Point><coordinates>-122.156749,37.440958</coordinates></Point>",small_green
-3,True,31,,,EMERSON ST,CHANNING AV,ADDISON AV,ADDISON AV,203,,Left,,2,Liquidambar styraciflua,Large Tree Routine Prune,11,1,40-45,15-30,Good,Good,5,,,Good,2,False,False,Planting Strip,,54,6/2/2010,BK,,,Palo Alto,-122.1566921,37.4408948,False,False,False,,Low,42,13872,,"Tree: 31 site 2 at 203 ADDISON AV, on EMERSON ST 54 from pl","<Point><coordinates>-122.156659,37.440902</coordinates></Point>",small_green
-4,True,32,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,209,,Front,,1,Ulmus parvifolia,Large Tree Routine Prune,18,1,35-40,30-45,Good,Good,5,,,Good,2,False,False,Planting Strip,,21,6/2/2010,BK,,,Palo Alto,-122.1564595,37.4410143,False,False,False,,Medium,43,13873,,"Tree: 32 site 1 at 209 ADDISON AV, on ADDISON AV 21 from pl","<Point><coordinates>-122.156427,37.441022</coordinates></Point>",small_green
-5,True,33,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,219,,Front,,1,Eriobotrya japonica,Large Tree Routine Prune,7,1,15-20,0-15,Good,Good,3,,,Good,1,False,False,Planting Strip,,16,6/1/2010,BK,,,Palo Alto,-122.1563676,37.441107,False,False,False,,None,44,13874,,"Tree: 33 site 1 at 219 ADDISON AV, on ADDISON AV 16 from pl","<Point><coordinates>-122.156335,37.441114</coordinates></Point>",small_green
-6,True,34,,,ADDISON AV,EMERSON ST,RAMONA ST,ADDISON AV,219,,Front,,2,Robinia pseudoacacia,Large Tree Routine Prune,29,1,50-55,30-45,Poor,Poor,5,,,Good,2,False,False,Planting Strip,,33,6/1/2010,BK,cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay;  beware of BEES.,,Palo Alto,-122.1563313,37.4411436,False,False,False,,None,45,13874,,"Tree: 34 site 2 at 219 ADDISON AV, on ADDISON AV 33 from pl","<Point><coordinates>-122.156299,37.441151</coordinates></Point>",small_green
-{snip}
-

The complete CSV file of Palo Alto tree data is available locally - - but please note that it is approximately 18MB in size.

-

- Google Fusion allows a user to download the tree data either from a filtered view or the entire spreadsheet. - The exported spreadsheet is organized and consistent tabular format. This includes:

-
    -
  • mapping spreadsheet cells to columns in the CSV file. -
  • -
  • ordering the CSV columns to match the order of the spreadsheet columns. -
  • -
  • The CSV file provides a primary key for each row (column GID), a unique identifier - for each tree (column Tree ID), accounts for missing data, and lists characteristics - describing the condition of the tree in the comments cell using a micro syntax to delimit the - characteristics list. The spreadsheet also provides geo coordinate information pinpointing each - inventoried tree. -
  • -
-

In order for information about a given tree to be reconciled with information about the same - tree originating from other sources, the local identifier for that tree must be mapped to a - globally unique identifier such as a URI.

-

Also note that in row 6, a series of statements describing the condition of the tree and other - important information are provided in the comments cell. These statements are - delimited using the semi-colon ";" character.

-

- Requires: - WellFormedCsvCheck, - CsvValidation, - PrimaryKey, - URIMapping, - MissingValueDefinition, - UnitMeasureDefinition, - CellMicrosyntax and - RepeatedProperties. -

-
- -
-

2.12 Use Case #12 - Chemical Structures

-

- (Contributed by Eric Stephan) -

-

The purpose of this use case is to illustrate how 3-D molecular structures such as the Protein Data Bank and XYZ formats are conveyed in tabular formats. These files be archived to be used informatics analysis or as part of an input deck to be used in experimental simulation. Scientific communities rely heavily on tabular formats such as these to conduct their research and share each others results in platform independent formats.

- -

The Protein Data Bank (pdb) file format is a tabular file describing the three dimensional structures of molecules held in the Protein Data Bank. The pdb format accordingly provides for description and annotation of protein and nucleic acid structures including atomic coordinates, observed sidechain rotamers, secondary structure assignments, as well as atomic connectivity.

- -

The XYZ file format is a chemical file format. There is no formal standard and several variations exist, but a typical XYZ format specifies the molecule geometry by giving the number of atoms with Cartesian coordinates that will be read on the first line, a comment on the second, and the lines of atomic coordinates in the following lines.

- -

In general this use case also illustrates the utility of CSV as a means for scientists to collect and process their experimental results:

-
    -
  • making it easier for data to be loaded into a spreadsheet to examine results
  • -
  • being able to edit or select a portion of results to be plotted
  • -
  • making it possible to combine all scans to examine full 2D composite image.
  • -
- -

The key characteristics of the XYZ format are:

-
    -
  • CSV contains two header rows, the first row containing the number of atoms in molecule (number of rows in data block). The second is a comment line.
  • -
  • Each row in the data block used a fix number of cells (atom name followed by x, y, z coordinates).
  • -
  • All values are delimited by spaces.
  • -
- -

- Requires: - WellFormedCsvCheck, - CsvValidation, - MultipleHeadingRows and - UnitMeasureDefinition. -

- -

Below is a Methane molecular structure organized in an XYZ format.

- -
Example 18
5
-methane molecule (in angstroms)
-C        0.000000        0.000000        0.000000
-H        0.000000        0.000000        1.089000
-H        1.026719        0.000000       -0.363000
-H       -0.513360       -0.889165       -0.363000
-H       -0.513360        0.889165       -0.363000
- -

The key characteristics of the PDB format are:

-
    -
  • Each PDB record is self describing and contains different ways to document each protein.
  • -
  • Each row of the file uses a token to depict the purpose of that row.
  • -
  • Tabular data rows varies fixed number of columns (e.g. ATOM) to non-fixed number of columns (SEQRES) that specify the number of columns in the row.
  • -
  • Because the PDB is a fully contained self describing record it also provides multiple tables to annotate the record. Each table appears to be delimited by a line in the file "...".
  • -
- -

- Requires: - GroupingOfMultipleTables. -

- -

Below is a example PDB file:

- -
Example 19
HEADER    EXTRACELLULAR MATRIX                    22-JAN-98   1A3I
-TITLE     X-RAY CRYSTALLOGRAPHIC DETERMINATION OF A COLLAGEN-LIKE
-TITLE    2 PEPTIDE WITH THE REPEATING SEQUENCE (PRO-PRO-GLY)
-...
-EXPDTA    X-RAY DIFFRACTION
-AUTHOR    R.Z.KRAMER,L.VITAGLIANO,J.BELLA,R.BERISIO,L.MAZZARELLA,
-AUTHOR   2 B.BRODSKY,A.ZAGARI,H.M.BERMAN
-...
-REMARK 350 BIOMOLECULE: 1
-REMARK 350 APPLY THE FOLLOWING TO CHAINS: A, B, C
-REMARK 350   BIOMT1   1  1.000000  0.000000  0.000000        0.00000
-REMARK 350   BIOMT2   1  0.000000  1.000000  0.000000        0.00000
-...
-SEQRES   1 A    9  PRO PRO GLY PRO PRO GLY PRO PRO GLY
-SEQRES   1 B    6  PRO PRO GLY PRO PRO GLY
-SEQRES   1 C    6  PRO PRO GLY PRO PRO GLY
-...
-ATOM      1  N   PRO A   1       8.316  21.206  21.530  1.00 17.44           N
-ATOM      2  CA  PRO A   1       7.608  20.729  20.336  1.00 17.44           C
-ATOM      3  C   PRO A   1       8.487  20.707  19.092  1.00 17.44           C
-ATOM      4  O   PRO A   1       9.466  21.457  19.005  1.00 17.44           O
-ATOM      5  CB  PRO A   1       6.460  21.723  20.211  1.00 22.26           C
-...
-HETATM  130  C   ACY   401       3.682  22.541  11.236  1.00 21.19           C
-HETATM  131  O   ACY   401       2.807  23.097  10.553  1.00 21.19           O
-HETATM  132  OXT ACY   401       4.306  23.101  12.291  1.00 21.19           O
-
- -
-

2.13 Use Case #13 - Representing Entities and Facts Extracted From Text

-

- (Contributed by Tim Finin) -

- -

The US National Institute of Standards and Technology (NIST) has run various conferences on extracting information from text centered around challenge problems. Participants submit the output of their systems on an evaluation dataset to NIST for scoring, typically in the form of tab-separated format.

- -

The 2013 NIST Cold Start Knowledge Base Population Task, for example, asks participants to extract facts from text and to represent these as triples along with associated metadata that include provenance and certainty values. A line in the submission format consists of a triple (subject-predicate-object) and, for some predicates, provenance information. Provenance includes a document ID and, depending on the predicate, one or three pairs of string offsets within the document. For predicates that are relations, an optional second set of provenance values can be provided. Each line can also have an optional float as a final column to represent a certainty measure.

- -

The following lines show examples of possible triples of varying length. In the second line, D00124 is the ID of a document and the strings like 283-286 refer to strings in a document using the offsets of the first and last characters. The final floating point value on some lines is the optional certainty value.

- -
Example 20
{snip}
-:e4 type         PER
-:e4 mention      "Bart"  D00124 283-286
-:e4 mention      "JoJo"  D00124 145-149 0.9
-:e4 per:siblings :e7     D00124 283-286 173-179 274-281
-:e4 per:age      "10"    D00124 180-181 173-179 182-191 0.9
-:e4 per:parent   :e9     D00124 180-181 381-380 399-406 D00101 220-225 230-233 201-210
-{snip}
- -

The submission format does not require that each line have the same number of columns. The expected provenance information for a triple depends on the predicate. For example, “type” typically has no provenance, “mention” has a document ID and offset pair, and domain predicates like “per:age” have one or two provenance records each of which has a document ID and three offset pairs.

- -

The file format exemplified above opens up for a number of issues described as follows. Each row is intended to describe an entity (e.g. the subject of the triple, “:e4”). The unique identifier for that entity is provided in the first column. In order for information about this entity to be reconcilled with information from other sources about the same entity, the local identifier needs to be mapped to a globally unique identifier such as a URI.

- -

- Requires: - URIMapping. -

- -

After each triple, there is a variable number of annotations representing the provenance of the triple and, occasionally, its certainty. This information has to be properly identified and managed.

- -

- Requires: - AnnotationAndSupplementaryInfo. -

- -

Entities “:e4”, “:e7” and “:e9” appear to be (foreign key) references to other entities described in this or in external tables. Likewise, also the identifiers “D00124” and “D00101” are ambiguous identifiers. It would be useful to identify the resources that these references represent.

- -

Moreover, “per” appears to be a term from a controlled vocabulary. How do we know which controlled vocabulary it is a member of and what its authoritative definition is?

- -

- Requires: - ForeignKeyReferences, - AssociationOfCodeValuesWithExternalDefinitions and - SemanticTypeDefinition. -

- -

The identifiers used for the entities (“:e4”, “:e7” and “:e9”), as well as those used for the predicates (e.g. “type”, “mention”, “per:siblings” etc.), are ambiguous local identifiers. How can one make the identifier an unambiguous URI? A similar requirement regards the provenance annotations. These are composed by document (e.g. “D00124”) and page number ranges. (e.g. “180-181”). Page number ranges are clearly valid only in the context of the preceding document identifier. The interesting assertion about provenance is the reference (document plus page range). Thus we might want to give the reference a unique identifier comprising from document ID and page range (e.g. D00124#180-181).

- -

- Requires: - URIMapping. -

- -

Besides the entities, the table presents also some values. Some of these are strings (e.g. “10”, “Bart”), some of them are probably floating point values (e.g. “0.9”). It would be useful to have an explicit syntactic type definition for these values.

- -

- Requires: - SyntacticTypeDefinition. -

- -

Entity “:e4” is the subject of many rows, meaning that many rows can be combined to make a composite set of statements about this entity.

- -

Moreover, a single row in the table comprises a triple (subject-predicate-object), one or more provenance references and an optional certainty measure. The provenance references have been normalised for compactness (e.g. so they fit on a single row). However, each provenance statement has the same target triple so one could unbundle the composite row into multiple simple statements that have a regular number of columns (see the two equivalent examples below).

- -
Example 21
{snip}
-:e4 per:age      "10"    D00124 180-181 173-179 182-191 0.9
-:e4 per:parent   :e9     D00124 180-181 381-380 399-406 D00101 220-225 230-233 201-210
-{snip}
-
Example 22
{snip}
-:e4 per:age      "10"    D00124 180-181 0.9
-:e4 per:age      "10"    D00124 173-179 0.9
-:e4 per:age      "10"    D00124 182-191 0.9
-:e4 per:parent   :e9     D00124 180-181
-:e4 per:parent   :e9     D00124 381-380
-:e4 per:parent   :e9     D00124 399-406
-:e4 per:parent   :e9     D00101 220-225
-:e4 per:parent   :e9     D00101 230-233
-:e4 per:parent   :e9     D00101 201-210
-{snip}
- -

- Requires: - TableNormalization. -

- -

Lastly, since we already observed that rows comprise triples, that there is a frequent reference to externally defined vocabularies, that values are defined as text (literals), and that triples are also composed by entities, for which we aim to obtain a URI (as described above), it may be useful to be able to convert such a table in RDF.

- -

- Requires: - CsvToRdfTransformation. -

-
- -
-

2.14 Use Case #14 - Displaying Locations of Care Homes on a Map

-

- (Contributed by Jeni Tennison) -

- -

NHS Choices makes available a number of (what it calls) CSV files for different aspects of NHS data on its website at http://www.nhs.uk/aboutnhschoices/contactus/pages/freedom-of-information.aspx

- -

One of the files (file = SCL.csv) contains information about the locations of care homes, as illustrated in the example below:

- -
Example 23
OrganisationID¬OrganisationCode¬OrganisationType¬SubType¬OrganisationStatus¬IsPimsManaged¬OrganisationName¬Address1¬Address2¬Address3¬City¬County¬Postcode¬Latitude¬Longitude¬ParentODSCode¬ParentName¬Phone¬Email¬Website¬Fax¬LocalAuthority
-220153¬1-303541019¬Care homes and care at home¬UNKNOWN¬Visible¬False¬Bournville House¬Furnace Lane¬Lightmoor Village¬¬Telford¬Shropshire¬TF4 3BY¬0¬0¬1-101653596¬Accord Housing Association Limited¬01952739284¬¬www.accordha.org.uk¬01952588949¬
-220154¬1-378873485¬Care homes and care at home¬UNKNOWN¬Visible¬True¬Ashcroft¬Milestone House¬Wicklewood¬¬Wymondham¬Norfolk¬NR18 9QL¬52.577003479003906¬1.0523598194122314¬1-377665735¬Julian Support Limited¬01953 607340¬ashcroftresidential@juliansupport.org¬http://www.juliansupport.org¬01953 607365¬
-220155¬1-409848410¬Care homes and care at home¬UNKNOWN¬Visible¬False¬Quorndon Care Limited¬34 Bakewell Road¬¬¬Loughborough¬Leicestershire¬LE11 5QY¬52.785675048828125¬-1.219469428062439¬1-101678101¬Quorndon Care Limited¬01509219024¬¬www.quorndoncare.co.uk¬01509413940¬
-{snip}
- -

The file has two interesting syntactic features:

-
    -
  • the cell separator is the not sign (¬, \u00AC) rather than a comma
  • -
  • no cells are wrapped in double quotes; some cells contain (unescaped) double quotes
  • -
- -

- Requires: - WellFormedCsvCheck, - SyntacticTypeDefinition and - NonStandardCellDelimiter. -

- -

Our user wants to be able to embed a map of these locations easily into my web page using a web component, such that she can use markup like:

- -
	<emap src="http://media.nhschoices.nhs.uk/data/foi/SCL.csv" latcol="Latitude" longcol="Longitude">
-        
- -

and see a map similar to that shown at https://github.com/JeniT/nhs-choices/blob/master/SCP.geojson, without converting the CSV file into GeoJSON.

- -

To make the web component easy to define, there should be a native API on to the data in the CSV file within the browser.

- -

- Requires: - CsvToJsonTransformation. -

- -
- -
-

2.15 Use Case #15 - Intelligently Previewing CSV files

-

- (Contributed by Jeni Tennison) -

-

- All of the data repositories based on the CKAN software, such - as data.gov.uk, data.gov, and many - others, use JSON as the representation of the data when providing a preview of CSV data within a browser. - Server side pre-processing of the CSV files is performed to try and determine column - types, clean the data and transform the CSV-encoded data to JSON in order to provide the preview. JSON has many - features which make it ideal for delivering a preview of the data, originally in CSV format, - to the browser. -

-

- Javascript is a hard dependency for interacting with data in the browser and as such - JSON was used as the serialization format because it was the most appropriate format for - delivering those data. As the object notation for Javascript JSON is natively understood - by Javascript it is therefore possible to use the data without any external dependencies. - The values in the data delivered map directly to common Javascript types and libraries for - processing and generating JSON, with appropriate type conversion, are widely available for - many programming languages. -

-

- Beyond basic knowledge of how to work with JSON, there is no further burden on the user - to understand complex semantics around how the data should be interpreted. The user of the - data can be assured that the data is correctly encoded as UTF-8 and it is easily queryable - using common patterns used in everyday Javascript. None of the encoding and - serialization flaws with CSV are apparent, although badly structured CSV files will be - mirrored in the JSON. -

-

- Requires: - WellFormedCsvCheck and - CsvToJsonTransformation. -

-

When providing the in-browser previews of CSV-formatted data, the utility of the preview application - is limited because the server-side processing of the CSV is not always able to determine - the data types (e.g. date-time) associated with data columns. As a result it is not possible - for the in-browser preview to offer functions such as sorting rows by date.

- -

As an example, see the - Spend over £25,000 in The Royal Wolverhampton Hospitals NHS Trust example. - Note that the underlying data begins with:

-
Example 24
"Expenditure over £25,000- Payment made in January 2014",,,,,,,,
-,,,,,,,,
-Department Family,Entity,Date,Expense Type,Expense Area,Supplier,Transaction Number,Amount in Sterling,
-Department of Health,The Royal Wolverhampton Hospitals NHS Trust RL4,31/01/2014,Capital Project,Capital,STRYKER UK LTD,0001337928,31896.06,
-Department of Health,The Royal Wolverhampton Hospitals NHS Trust RL4,17/01/2014,SERVICE AGREEMENTS,Pathology,ABBOTT LABORATORIES LTD,0001335058,77775.13,
-...
-

A local copy of this dataset is available: file = mth-10-january-2014.csv

-

The header line here comes below an empty row, and there is metadata about the table in the row above the empty row. The preview code manages to - identify the headers from the CSV, and displays the metadata as the value in the first cell of the first row.

-

- Requires: - MultipleHeadingRows and - AnnotationAndSupplementaryInfo. -

-

It would be good if the preview could recognise that the Date column contains a date and that the Amount in Sterling column contains a number, - so that it could offer options to filter/sort these by date/numerically.

-

- Requires: - SemanticTypeDefinition, - SyntacticTypeDefinition and - UnitMeasureDefinition. -

-

Moreover, some of the values reported may refer to external definitions (from dictionaries or other sources). It would be useful to know where it is - possible to find such resources, to be able to properly handle and visualize the data, by linking to them.

-

- Requires: - AssociationOfCodeValuesWithExternalDefinitions. -

-

Lastly, the web page where the CSV is published presents also useful metadata about it. It would be useful to be able to know and access these metadata - even though they are not included in the file.

-

These include:

-
    -
  • Resource title
  • -
  • Publisher
  • -
  • License
  • -
  • Abstract / description
  • -
  • Date last updated
  • -
-

- Requires: - AnnotationAndSupplementaryInfo. -

- -
-
-

2.16 Use Case #16 - Tabular Representations of NetCDF data Using CDL Syntax

-

- (Contributed by Eric Stephan) -

-

NetCDF is a set of binary data formats, programming interfaces, and software libraries that help read and write scientific data files. - NetCDF provides scientists a means to share measured or simulated experiments with one another across the web. What makes - NetCDF useful is its ability to be self describing and provide a means for scientists to rely on existing data model - as opposed to needing to write their own. The classic NetCDF data model consists of variables, dimensions, and attributes. - This way of thinking about data was introduced with the very first NetCDF release, and is still the core of all NetCDF files. -

-

Among the tools available to the NetCDF community, two tools: ncdump and ncgen. The ncdump tool is used - by scientists wanting to inspect variables and attributes (metadata) contained in the NetCDF file. It also - can provide a full text extraction of data including blocks of tabular data representing by variables. - While NetCDF files are typically written by a software client, it is possible to generate NetCDF files using - ncgen and ncgen3 from a text format. The ncgen tool parses the text file and stores it in a binary format. -

-

Both ncdump and ncgen rely on a text format to represent the NetCDF file called network Common Data form - Language (CDL). The CDL syntax as shown below contains annotation along with blocks of data denoted by the - "data:" key. For the results to be legible for visual inspection the measurement data is written as delimited - blocks of scalar values. As shown in the example below CDL supports multiple variables or blocks of data. - The blocks of data while delimited need to be thought of as a vector or single column of tabular data - wrapped around to the next line in a similar way that characters can be wrapped around in a single cell block - of a spreadsheet to make the spreadsheet more visually appealing to the user. -

-
Example 25
netcdf foo {    // example NetCDF specification in CDL
-
-dimensions:
-lat = 10, lon = 5, time = unlimited;
-
-variables:
-  int     lat(lat), lon(lon), time(time);
-  float   z(time,lat,lon), t(time,lat,lon);
-  double  p(time,lat,lon);
-  int     rh(time,lat,lon);
-
-  lat:units = "degrees_north";
-  lon:units = "degrees_east";
-  time:units = "seconds";
-  z:units = "meters";
-  z:valid_range = 0., 5000.;
-  p:_FillValue = -9999.;
-  rh:_FillValue = -1;
-
-data:
-  lat   = 0, 10, 20, 30, 40, 50, 60, 70, 80, 90;
-  lon   = -140, -118, -96, -84, -52;
-}
- -

- The next example shows a small subset of data block taken from an actual NetCDF file. - The blocks of data while delimited need to be thought of as a vector or single column - of tabular data wrapped around to the next line in a similar way that characters can be - wrapped around in a single cell block of a spreadsheet to make the spreadsheet more - visually appealing to the user. -

-
Example 26
data:
-
- base_time = 1020770640 ;
-
- time_offset = 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
-    34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68,
-    70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102,
-    104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130,
-    132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158,
-    160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186,
-    188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214,
-    216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242,
-    244, 246, 248, 250, 252, 254, 256, 258, 260, 262, 264, 266, 268, 270,
-    272, 274, 276, 278, 280, 282, 284, 286, 288, 290, 292, 294, 296, 298,
-    300, 302, 304, 306, 308, 310, 312, 314, 316, 318, 320, 322, 324, 326,
-    328, 330, 332, 334, 336, 338, 340, 342, 344, 346, 348, 350, 352, 354,
-    356, 358, 360, 362, 364, 366, 368, 370, 372, 374, 376, 378, 380, 382,
-    384, 386, 388, 390, 392, 394, 396, 398, 400, 402, 404, 406, 408, 410,
-    412, 414, 416, 418, 420, 422, 424, 426, 428, 430, 432, 434, 436, 438,
-    440, 442, 444, 446, 448, 450, 452, 454, 456, 458, 460, 462, 464, 466,
-    468, 470, 472, 474, 476, 478, 480, 482, 484, 486, 488, 490, 492, 494,
-    496, 498, 500, 502, 504, 506, 508, 510, 512, 514, 516, 518, 520, 522;
- -

- The format allows for error codes and missing values to be included. -

-

- Requires: - WellFormedCsvCheck, - CsvValidation, - UnitMeasureDefinition, - MissingValueDefinition and - GroupingOfMultipleTables. -

-

- Lastly, NetCDF files are typically collected together in larger datasets - where they can be analyzed, so the CSV data can be thought of a subset - of a larger dataset. -

- Requires: - CsvAsSubsetOfLargerDataset and - AnnotationAndSupplementaryInfo. -

- - -
-
-

2.17 Use Case #17 - Canonical mapping of CSV

-

- (Contributed by David Booth and Jeremy Tandy) -

- -

CSV is by far the commonest format within which open data is published, and is thus - typical of the data that application developers need to work with.

-

However, an object / object graph serialisation (of open data) is easier to consume within - software applications. For example, web applications (using HTML5 & Javascript) require - no extra libraries to work with data in JSON format. Similarly, RDF-encoded data in from - multiple sources can be simply combined or merged using SPARQL queries once persisted - within a triple store.

- -

The - UK Government policy paper "Open Data: unleashing the potential" outlines a - - set of principles for publishing open data. Within this document, principle 9 states:

- -

- Release data quickly, and then work to make sure that it is available in open standard - formats, including linked data formats. -

- -

The open data principles recognise how the additional utility to be gained from publishing in - linked data formats must be balanced against the additional effort incurred by the - data publisher to do so and the resulting delay to publication of the data. Data publishers - are required to release data quickly - which means making the data available in - a format convenient for them such as CSV dumps from databases or spread sheets.

- -

One of the hindrances to publishing in linked data formats is the difficulty in - determining the ontology or vocabulary (e.g. the classes, predicates, namespaces and - other usage patterns) that should be used to describe the data. Whilst it is - only reasonable to assume that a data publisher best knows the intended meaning of their - data, they cannot be expected to determine the ontology or vocabulary most applicable to - to a consuming application!

- -

Furthermore, in lieu of agreed de facto standard vocabularies or ontologies for a given - application domain, it is highly likely that disparate applications will conform to different data - models. How should the data publisher choose which of the available vocabularies or - ontologies to use when publishing (if indeed they are aware of those applications at all)!

- -

In order to assist data publishers provide data in linked data formats without - the need to determine ontologies or vocabularies, it is necessary to separate the syntactic - mapping (e.g. changing format from CSV to JSON) from the semantic mapping - (e.g. defining the transformations required to achieve semantic alignment with a target - data model).

- -

As a result of such separation, it will be possible to establish a canonical - transformation from CSV conforming to the - core tabular data model [tabular-data-model] - to an object graph serialisation such as JSON.

- -

- Requires: - WellFormedCsvCheck, - CsvToJsonTransformation and - CanonicalMappingInLieuOfAnnotation. -

- -

This use case assumes that JSON is the target serialisation for application developers - given the general utility of that format. However, by considering JSON-LD [json-ld], it becomes - trivial to map CSV-encoded tabular data via JSON into a canonical RDF model. In doing so - this enables CSV-encoded tabular data to be published in linked data formats - as required in the open data principle 9 at no extra effort to the data publisher as - standard mechanisms are available for a data user to transform the data from CSV to RDF.

- -

- Requires: - CsvToRdfTransformation. -

- -

In addition, open data principle 14 requires that:

- -

- Public bodies should publish relevant metadata about their datasets […]; and they - should publish supporting descriptions of the format, provenance and meaning of the data. -

- -

To achieve this, data publishers need to be able to publish supplementary metadata concerning - their tabular datasets, such as title, usage license and description.

- -

- Requires: - AnnotationAndSupplementaryInfo. -

- -

Applications may automatically determine the data type (e.g. date-time, number) associated - with cells in a CSV file by parsing the data values. However, on occasion, this is prone to - mistakes where data appears to resemble something else. This is especially - prevalent for dates. For example, 1/4 is often confused with 1 April - rather than 0.25. In such situations, it is beneficial if guidance can be given to the - transformation process indicating the data type for given columns.

- -

- Requires: - SyntacticTypeDefinition. -

- -

Provision of CSV data coupled with a canonical mapping provides significant utility by itself. However, - there is nothing stopping a data publisher from adding annotation defining data semantics once, say, - an appropriate de facto standard vocabulary has been agreed within the community of use. Similarly, a - data consumer may wish to work directly with the canonical mapping and wish to ignore any semantic - annotations provided by the publisher.

- -
-
-

2.18 Use Case #18 - Supporting Semantic-based Recommendations

-

- (Contributed by Davide Ceolin and Valentina Maccatrozzo) -

-

In the ESWC-14 Challenge: Linked Open Data-enabled Recommender Systems, - participants are provided with a series of datasets about books in TSV format.

-

A first dataset contains a set of user identifiers and their ratings for a bunch of books each. Each book is represented by means of a numeric identifier.

-
Example 27
DBbook_userID,	DBbook_itemID,	rate
-{snip}
-6873,		5950,		1
-6873,		8010,		1
-6873,		5232,		1
-{snip}
-

Ratings can be boolean (0,1) or Likert scale values (from 1 to 5), depending on the challenge task considered.

-

- Requires: - SyntacticTypeDefinition, - SemanticTypeDefinition and - NonStandardCellDelimiter. -

- -

A second file provides a mapping between book ids and their names and dbpedia URIs:

- -
Example 28
DBbook_ItemID	name				DBpedia_uri
-{snip}
-1		Dragonfly in Amber		http://dbpedia.org/resource/Dragonfly_in_Amber
-10		Unicorn Variations		http://dbpedia.org/resource/Unicorn_Variations
-100		A Stranger in the Mirror	http://dbpedia.org/resource/A_Stranger_in_the_Mirror
-1000		At All Costs			http://dbpedia.org/resource/At_All_Costs
-{snip}
-

- Requires: - ForeignKeyReferences. -

- -

Participants are requested to estimate the ratings or relevance scores (depending on the task) that users would - attribute to a set of books reported in an evaluation dataset:

- -
Example 29
DBbook_userID	DBbook_itemID
-{snip}
-6873		5946
-6873		5229
-6873		3151
-{snip}
-

- Requires: - R-AssociationOfCodeValuesWithExternalDefinitions. -

-

The challenge mandates the use of Linked Open Data resources in the recommendations.

-

An effective manner to satisfy this requirement is to make use of undirected semantic paths. - An undirected semantic path is a sequence of entities (subject or object) and properties that link two items, for instance:

- -
	{Book1 property1 Object1 property2 Book2}
-	
- -

This sequence results from considering the triples (subject-predicate-object) in a given Linked Open Data resource (e.g. DBpedia), - independently of their direction, such that the starting and the ending entities are the desired items and that the subject (or object) of - a triple is the object (or subject) of the following triple. - For example, the sequence above may result from the following triples:

- -
	Book1 property1 Object1
-	Book2 property1 Object1
-	
- -

- Undirected semantic paths are classified according to their length. Fixed a length, one can extract all the undirected semantic paths of that length - that link two items within a Linked Open Data resource by running a set of SPARQL queries. - This is necessary because an undirected semantic path actually corresponds to the union of a set of directed semantic paths. In the source, data are stored - in terms of directed triples (subject-predicate-object). -

-

- The number of queries that is necessary to run in order to obtain all the undirected semantic paths that link to items is exponential of the - length of the path itself (2n). Because of the complexity of this task and of the possible latency times deriving from - it, it might be useful to cache these results. -

- -

CSV is a good candidate for caching undirected semantic paths, because of its ease of use, sharing, reuse. However, there are some open issues - related to this. - First, since paths may present a variable number of components, one might want to represent paths in a single cell, - while being able to separate the path elements when necessary. -

-

For example, in this file, undirected semantic paths are grouped by means - of double quotes, and path components are separated by commas. The starting and ending elements of the undirected semantic paths (Book1 and Book2) are represented - in two separate columns by means of the book identifiers used in the challenge (see the example below). -

- -
Example 30
Book1	Book2	Path
-{snip}
-1	7680	"http://dbpedia.org/ontology/language,http://dbpedia.org/resource/English_language,http://dbpedia.org/ontology/language"
-1	2	"http://dbpedia.org/ontology/author,http://dbpedia.org/resource/Diana_Gabaldon,http://dbpedia.org/ontology/author"
-1	2	"http://dbpedia.org/ontology/country,http://dbpedia.org/resource/United_States,http://dbpedia.org/ontology/country"
-{snip}
-

- Requires: - CellMicrosyntax and - RepeatedProperties. -

-

- Second, the size of these caching files may be remarkable. For example, the size of this file described above is ~2GB, and that may imply prohibitive - loading times, especially when making a limited number of recommendations.

-

Since rows are sorted according to the starting and the ending book of the undirected semantic path, then all the undirected semantic paths that link two books - are present in a region of the table formed by consecutive rows.

-

By having at our disposal an annotation of such regions indicating which book they describe, one might be able to select the "slice" of - the file he needs to make a recommendation, without having to load it entirely.

-

- Requires: - AnnotationAndSupplementaryInfo and - RandomAccess. -

-
-
-

2.19 Use Case #19 - Supporting Right to Left (RTL) Directionality

-

- (Contributed by Yakov Shafranovich) -

-

Writing systems affect the way in which information is displayed. In some cases, these writing - systems affect the order in which characters are displayed. Latin based languages display text - left-to-right across a page (LTR). Languages such as Arabic and Hebrew are written in scripts - whose dominant direction is right to left (RTL) when displayed, however when it involves - non-native text or numbers it is actually bidirectional.

-

Irrespective of the LTR or RTL display of characters in a given language, data is serialised - such that the bytes are ordered in one sequential order.

- -

Content published in Hebrew and Arabic provide examples of RTL display behaviour.

- -
Note

Tabular data from originating from countries where vertical writing is the norm - (e.g. China, Japan) appear to be published with rows and columns as defined in [RFC4180] (e.g. - each horizontal line in the data file conveys a row of data, with the first line optionally - providing a header with column names). Rows are published in the left to right topology.

- -

The results from the Egyptian Referendum of 2012 - illustrate the problem, as can be seen in Fig. 2 Snippet of web page displaying Egyptian Referendum results (2012).

- -
- egypt-referendum-2012-result-web-page-snip.PNG -
Fig. 2 Snippet of web page displaying Egyptian Referendum results (2012)
-
- -

The content in the - CSV data file - is serialised in the order as illustrated below (assuming LTR rendering):

- -
Example 31

-‌ا‌ل‌م‌ح‌ا‌ف‌ظ‌ة‌,‌ن‌س‌ب‌ة‌ ‌م‌و‌ا‌ف‌ق‌,‌ن‌س‌ب‌ة‌ ‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌,‌ع‌د‌د‌ ‌ا‌ل‌ن‌ا‌خ‌ب‌ي‌ن‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ص‌ح‌ي‌ح‌ة‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ب‌ا‌ط‌ل‌ة‌,‌ن‌س‌ب‌ة‌ ‌ا‌ل‌م‌ش‌ا‌ر‌ك‌ة‌,‌م‌و‌ا‌ف‌ق‌,‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌
-‌ا‌ل‌ق‌ل‌ي‌و‌ب‌ي‌ة‌,60.0,40.0,"2,639,808","853,125","15,224",32.9,"512,055","341,070"
-‌ا‌ل‌ج‌ي‌ز‌ة‌,66.7,33.3,"4,383,701","1,493,092","24,105",34.6,"995,417","497,675"
-‌ا‌ل‌ق‌ا‌ه‌ر‌ة‌,43.2,56.8,"6,580,478","2,254,698","36,342",34.8,"974,371","1,280,327"
-‌ق‌ن‌ا‌,84.5,15.5,"1,629,713","364,509","6,743",22.8,"307,839","56,670"
-{snip}
-
- -

A copy of the referendum results data file is also available locally.

- -
Note

- Readers should be aware that both the right-to-left text direction and the cursive nature of Arabic text - has been explicitly overridden in the example above in order to display each individual character in - sequential left-to-right order. -

- -

The directionality of the content as displayed does not affect the logical structure of the tabular data; - i.e. the cell at index zero is followed by the cell at index 1, and then index 2 etc.

- -

However, without awareness of the directionality of the content, an application may display data in - a way that is unintuitive for the a RTL reader. For example, viewing the CSV file using - Libre Office Calc (tested - using version 3 configured with English (UK) locale) demonstrates the challenge in rendering the content correctly. - Fig. 3 CSV data file containing Egyptian Referendum results (2012) displayed in Libre Office Calc shows how the - content is - incorrectly rendered; cells progress from left-to-right yet, on the positive side, the Arabic text - within a given field runs from right-to-left. Similar behaviour is observed in Microsoft Office Excel 2007.

- -
- egypt-referendum-2012-result-csv-in-libre-office-3.png -
Fig. 3 CSV data file containing Egyptian Referendum results (2012) displayed in Libre Office Calc
-
- -

By contrast, we can see Fig. 4 CSV data file containing Egyptian Referendum results (2012) displayed in TextWrangler. The simple - TextWrangler text editor is not aware that the overall - direction is right-to-left, but does apply the Unicode bidirectional algorithm such that lines starting with an - Arabic character have a direction base of right-to-left. However, as a result, the numeric digits are also displayed - right to left, which is incorrect.

- -
- egypt-referendum-2012-result-csv-in-textwrangler.png -
Fig. 4 CSV data file containing Egyptian Referendum results (2012) displayed in TextWrangler
-
- -

It is clear that a mechanism needs to be provided such that one can explicitly declare the directionality - which applies when parsing and rendering the content of CSV files.

- -
Note
-

From Unicode version 6.3 onwards, the Unicode Standard contains new control codes (RLI, LRI, FSI, PDI) to enable authors to express isolation at the same time as direction in inline bidirectional text. The Unicode Consortium recommends that isolation be used as the default for all future inline bidirectional text embeddings. To use these new control codes, however, it will be necessary to wait until the browsers support them. The new control codes are:

-
    -
  • RLI (RIGHT-TO-LEFT ISOLATE) U+2067 to set direction right-to-left
  • -
  • LRI (LEFT-TO-RIGHT ISOLATE) U+2066 to set direction left-to-right
  • -
  • FSI (FIRST STRONG ISOLATE) U+2068 to set direction according to the first strong character
  • -
  • PDI (POP DIRECTIONAL ISOLATE) U+2069 to terminate the range set by RLI, LRI or FSI
  • -
-

More information on setting the directionality of text without markup can be found here

-
- -

Requires: - RightToLeftCsvDeclaration. -

- -
- - -
-

2.20 Use Case #20 - Integrating components with the TIBCO Spotfire platform using tabular data

-

- (Contributed Yakov Shafranovich) -

-

A systems integrator seeks to integrate a new component into the - TIBCO Spotfire analytics platform. - Reviewing the documentation that describes how to extend the platform indicates - that Spotfire employs a common tabular file format for all products: the - Spotfire Text Data Format - (STDF).

-

The example from the STDF documentation (below) illustrates a number of the key - differences with the standard CSV format defined in [RFC4180].

-
Example 32
<bom>\! filetype=Spotfire.DataFormat.Text; version=1.0;
-\* ich bin ein berliner
-Column A;Column #14B;Kolonn Ö;The n:th column;
-Real;String;Blob;Date;
--123.45;i think there\r\nshall never be;\#aaXzD;2004-06-18;
-1.0E-14;a poem\r\nlovely as a tree;\#ADB12=;\?lost in time;
-222.2;\?invalid text;\?;2004-06-19;
-\?error11;\\förstår ej\\;\#aXzCV==;\?1979;
-3.14;hej å hå\seller?;\?NIL;\?#ERROR;
- -
    -
  • -

    The first line of the STDF file includes a - byte order mark (BOM), the - character sequence "\!" and metadata about the file type and version - to inform consuming applications.

    -

    Requires: - AnnotationAndSupplementaryInfo.

    -
  • -
  • -

    The second line is a comment line which is ignored during processing. - The comment is recognised from the initial sequence of characters within the line: - "\*".

    -

    Requires: - CommentLines.

    -
  • -
  • -

    Lines three and four provide metadata: column heading names and the data types - (including integer, real, string, date, time, datetime and blob) for each column - respectively.

    -

    Requires: - MultipleHeadingRows and - SyntacticTypeDefinition.

    -
  • -
  • -

    Cells are delimited using the semi-colon ";" character.

    -

    Requires: - NonStandardCellDelimiter.

    -
  • -
  • -

    Date and time values are strictly formatted; YYYY-MM-DD and - HH:MM:SS respectively.

    -

    Requires: - CellMicrosyntax.

    -
  • -
  • -

    Base64-encoded binary values may be included. These are designated by setting - the initial cell value to "\#".

    -
  • -
  • -

    A number of escape sequences for special characters are supported; e.g. - "\\" (backslash within a string), - "\s" (semicolon within a string - not a cell or list item delimiter), - "\n" (newline within a string) and - "\t" (tab within a string) etc.

    -

    These special characters don't affect the parsing of the data but are further - examples of the use of microsyntax within cells.

    -

    Requires: - CellMicrosyntax.

    -
  • -
  • -

    Null and invalid values are indicated by setting the initial character sequence of a cell to "\?". Optionally, an error code or other informative statement may follow.

    -

    - Requires: - MissingValueDefinition and - CellMicrosyntax. -

    -
  • -
- -

Although not shown in this example, STDF also supports list types:

-
    -
  • A valid list value must begin with "\[" and end with "\]" followed by a terminating semicolon.
  • -
  • All list items are terminated by a semicolon, including the last item in a list.
  • -
- -

- Requires: - CellMicrosyntax. -

-
- -
-

2.21 Use Case #21 - Publication of Biodiversity Information from GBIF using the Darwin Core Archive Standard

-

- (Contributed by Tim Robertson, GBIF, and Jeremy Tandy) -

- -

A citizen scientist investigating biodiversity in the Parque Nacional de Sierra Nevada, - Spain, aims to create a compelling web application that combines biodiversity - information with other environmental factors - displaying this information on a map and - as summary statistics.

- -

The Global Biodiversity Information Facility (GBIF), - a government funded open data initiative that spans over 600 institutions worldwide, has - mobilised more that 435 million records - describing the occurrence of flora and fauna.

- -

Included in their data holdings is - "Sinfonevada: Dataset of Floristic diversity in Sierra Nevada forest (SE Spain)", - containing around 8000 records belonging to 270 taxa collected between January 2004 and - December 2005.

- -

As with the majority of datasets published via GBIF, the Sinfonevada dataset is available - in the Darwin Core Archive - format (DwC-A).

- -

In accordance with the DwC-A specification, the Sinfonevada dataset is packaged as a zip - file containing:

-
    -
  • tab delimited tabular data file: occurrence.txt
  • -
  • metadata describing that tabular data file: meta.xml
  • -
  • supplementary dataset metadata: eml.xml
  • -
- -

The metadata file included in the zip package must always be named meta.xml, - whilst the tabular data file and supplementary metadata are explicitly identified within the - main metadata file.

- -

A copy of the zip package is provided for reference. Snippets of - the tab delimited tabular data file and the full metdata file "meta.xml" are provided below.

- -
Example 33
"occurrence.txt"
-----------------
-
-id	modified	institutionCode	collectionCode	basisOfRecord	catalogNumber	eventDate	fieldNumber	continent	countryCode	stateProvince	county	locality	minimumElevationInMeters	maximumElevationInMeters	decimalLatitude	decimalLongitude	coordinateUncertaintyInMeters	scientificName	kingdom	phylum	class	order	family	genus	specificEpithet	infraspecificEpithet	scientificNameAuthorship
-OBSNEV:SINFONEVADA:SINFON-100-005717-20040930	2013-06-20T11:18:18	OBSNEV	SINFONEVADA	HumanObservation	SINFON-100-005717-20040930	2004-09-30 & 2004-09-30		Europe	ESP	GR	ALDEIRE		1992	1992	37.12724018	-3.116135071	1	Pinus sylvestris Lour.	Plantae	Pinophyta	Pinopsida	Pinales	Pinaceae	Pinus	sylvestris		Lour.
-OBSNEV:SINFONEVADA:SINFON-100-005966-20040930	2013-06-20T11:18:18	OBSNEV	SINFONEVADA	HumanObservation	SINFON-100-005966-20040930	2004-09-30 & 2004-09-30		Europe	ESP	GR	ALDEIRE		1992	1992	37.12724018	-3.116135071	1	Berberis hispanica Boiss. & Reut.	Plantae	Magnoliophyta	Magnoliopsida	Ranunculales	Berberidaceae	Berberis	hispanica		Boiss. & Reut.
-OBSNEV:SINFONEVADA:SINFON-100-008211-20040930	2013-06-20T11:18:18	OBSNEV	SINFONEVADA	HumanObservation	SINFON-100-008211-20040930	2004-09-30 & 2004-09-30		Europe	ESP	GR	ALDEIRE		1992	1992	37.12724018	-3.116135071	1	Genista versicolor Boiss. ex Steud.	Plantae	Magnoliophyta	Magnoliopsida	Fabales	Fabaceae	Genista	versicolor		Boiss. ex Steud.
-{snip}
- -

The key variances of this tabular data file with RFC 4180 is the use of TAB - %x09 as the cell delimiter and LF %x0A as the row - terminator.

- -

Also note the use of two adjacent TAB characters to indicate an empty cell.

- -
Example 34
"meta.xml"
-----------
-
-<archive xmlns="http://rs.tdwg.org/dwc/text/" metadata="eml.xml">
-  <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
-    <files>
-      <location>occurrence.txt</location>
-    </files>
-    <id index="0" />
-    <field index="1" term="http://purl.org/dc/terms/modified"/>
-    <field index="2" term="http://rs.tdwg.org/dwc/terms/institutionCode"/>
-    <field index="3" term="http://rs.tdwg.org/dwc/terms/collectionCode"/>
-    <field index="4" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
-    <field index="5" term="http://rs.tdwg.org/dwc/terms/catalogNumber"/>
-    <field index="6" term="http://rs.tdwg.org/dwc/terms/eventDate"/>
-    <field index="7" term="http://rs.tdwg.org/dwc/terms/fieldNumber"/>
-    <field index="8" term="http://rs.tdwg.org/dwc/terms/continent"/>
-    <field index="9" term="http://rs.tdwg.org/dwc/terms/countryCode"/>
-    <field index="10" term="http://rs.tdwg.org/dwc/terms/stateProvince"/>
-    <field index="11" term="http://rs.tdwg.org/dwc/terms/county"/>
-    <field index="12" term="http://rs.tdwg.org/dwc/terms/locality"/>
-    <field index="13" term="http://rs.tdwg.org/dwc/terms/minimumElevationInMeters"/>
-    <field index="14" term="http://rs.tdwg.org/dwc/terms/maximumElevationInMeters"/>
-    <field index="15" term="http://rs.tdwg.org/dwc/terms/decimalLatitude"/>
-    <field index="16" term="http://rs.tdwg.org/dwc/terms/decimalLongitude"/>
-    <field index="17" term="http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters"/>
-    <field index="18" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
-    <field index="19" term="http://rs.tdwg.org/dwc/terms/kingdom"/>
-    <field index="20" term="http://rs.tdwg.org/dwc/terms/phylum"/>
-    <field index="21" term="http://rs.tdwg.org/dwc/terms/class"/>
-    <field index="22" term="http://rs.tdwg.org/dwc/terms/order"/>
-    <field index="23" term="http://rs.tdwg.org/dwc/terms/family"/>
-    <field index="24" term="http://rs.tdwg.org/dwc/terms/genus"/>
-    <field index="25" term="http://rs.tdwg.org/dwc/terms/specificEpithet"/>
-    <field index="26" term="http://rs.tdwg.org/dwc/terms/infraspecificEpithet"/>
-    <field index="27" term="http://rs.tdwg.org/dwc/terms/scientificNameAuthorship"/>
-  </core>
-</archive>
- -

The metadata file specifies:

-
    -
  • a link to the supplementary metadata file eml.xml
  • -
  • the data file encoding UTF-8
  • -
  • cell delimiter
  • -
  • row terminator
  • -
  • cell escaping
  • -
  • the number of rows to skip at the beginning of the file before the data section (e.g. the length of the header section)
  • -
  • the type of entity that each row in the tabular dataset describes
  • -
  • the name of the tabular data file occurence.txt
  • -
  • the column which provides the unique identifier for the entity described by each row
  • -
  • the property type associated with each column based on the column number index
  • -
- -

Requires: - NonStandardCellDelimiter, - ZeroEditAdditionOfSupplementaryMetadata and - AnnotationAndSupplementaryInfo.

- -

The ignoreHeaderLines attribute can be used to ignore files with column - headings or preamble comments.

- -

In this particular case, the tabular data file is packaged within the zip file, and - is referenced locally. However, the DwC-A specification also supports annotation of remote - tabular data files, and thus does not require any modification of the source datafiles - themselves.

- -

Requires: - LinkFromMetadataToData and - IndependentMetadataPublication.

- -

Although not present in this example, DwC-A also supports the ability to specify - a property-value pair that is applied to every row in the tabular data file, or, - in the case of sparse data, for that property-value pair to be added where the property - is absent from the data file (e.g. providing a default value for a property).

- -

Requires: - SpecificationOfPropertyValuePairForEachRow.

- -

Future releases of DwC-A also seek to provide stronger typing of data formats; - at present only date formats are validated.

- -

Requires: - SyntacticTypeDefinition.

- -

Whilst the DwC-A format is embedded in many software platforms, including web based tools, - none of these seem to fit the needs of the citizen scientist. They want to use existing - javascript libraries such as Leaflet, an open-Source - javascript library for interactive maps, where possible to simplify their web development - effort.

- -

Leaflet has good support for GeoJSON, a JSON format - for encoding a variety of geographic data structures.

- -

In the absence of standard tooling, the citizen scientist needs to write a custom parser - to convert the tab delimited data into GeoJSON. An example GeoJSON object resulting from - this transformation is provided below.

- -
Example 35
{
-    "type": "Feature",
-    "id": "OBSNEV:SINFONEVADA:SINFON-100-005717-20040930",
-    "properties": {
-        "modified": "2013-06-20T11:18:18",
-        "institutionCode": "OBSNEV",
-        "collectionCode": "SINFONEVADA",
-        "basisOfRecord": "HumanObservation",
-        "catalogNumber": "SINFON-100-005717-20040930",
-        "eventDate": "2004-09-30 & 2004-09-30",
-        "fieldNumber": "",
-        "continent": "Europe",
-        "countryCode": "ESP",
-        "stateProvince": "GR",
-        "county": "ALDEIRE",
-        "locality": "",
-        "minimumElevationInMeters": "1992",
-        "maximumElevationInMeters": "1992",
-        "coordinateUncertaintyInMeters": "1",
-        "scientificName": "Pinus sylvestris Lour.",
-        "kingdom": "Plantae",
-        "phylum": "Pinophyta",
-        "class": "Pinopsida",
-        "order": "Pinales",
-        "family": "Pinaceae",
-        "genus": "Pinus",
-        "specificEpithet": "sylvestris",
-        "infraspecificEpithet": "",
-        "scientificNameAuthorship": "Lour."
-    },
-    "geometry": {
-        "type": "Point",
-        "coordinates": [-3.116135071, 37.12724018, 1992]
-    }
-}
- -
Note

- GeoJSON coordinates are specified in order of longitude, latitude and, optionally, altitude. -

- -

Requires: - CsvToJsonTransformation.

- -

The citizen scientist notes that many of the terms in a given row are drawn from controlled - vocabularies; geographic names and taxonomies. For the application, they want to be able to - refer to the authoritative definitions for these controlled vocabularies, say, to provide - easy access for users of the application to the defintions of scientific terms such as "Pinophyta".

- -

Requires: - AssociationOfCodeValuesWithExternalDefinitions.

- -

Thinking to the future of their application, our citizen scientist anticipates the need - to aggregate data across multiple datasets; each of which might use different column headings - depending on who compiled the tabular dataset. Furthermore, how can one be sure they are - comparing things of equivalent type?

- -

To remedy this, they want to use the definitions from the metadata file - meta.xml. The easiest approach to achieve this is to modify their parser - to export [json-ld] and transform the tabular data into RDF that can be easily - reconciled.

- -

The resultant "GeoJSON-LD" takes the form (edited for brevity):

- -
Example 36
{
-    "@context": {
-        "base": "http://www.gbif.org/dataset/db6cd9d7-7be5-4cd0-8b3c-fb6dd7446472/",
-        "Feature": "http://example.com/vocab#Feature",
-        "Point": "http://example.com/vocab#Point",
-        "modified": "http://purl.org/dc/terms/modified",
-        "institutionCode": "http://rs.tdwg.org/dwc/terms/institutionCode",
-        "collectionCode": "http://rs.tdwg.org/dwc/terms/collectionCode",
-        "basisOfRecord": "http://rs.tdwg.org/dwc/terms/basisOfRecord",
-{snip}
-    },
-    "type": "Feature",
-    "@type": "http://rs.tdwg.org/dwc/terms/Occurrence",
-    "id": "OBSNEV:SINFONEVADA:SINFON-100-005717-20040930",
-    "@id": "base:OBSNEV:SINFONEVADA:SINFON-100-005717-20040930",
-    "properties": {
-        "modified": "2013-06-20T11:18:18",
-        "institutionCode": "OBSNEV",
-        "collectionCode": "SINFONEVADA",
-        "basisOfRecord": "HumanObservation",
-{snip}
-    },
-    "geometry": {
-        "type": "Point",
-        "coordinates": [-3.116135071, 37.12724018, 1992]
-    }
-}
- -

The complete JSON object may be retrieved here.

- -

The unique identifier for each "occurence" record has been mapped to - a URI by appending the local identifier (from column id) - to the URI of the dataset within which the recond occurs.

- -

Requires: - URIMapping - SemanticTypeDefinition and - CsvToRdfTransformation.

- -
Note
-

The @type of the entity is taken from the rowType attribute - within the metadata file.

-
- -
Note
-

The amendment of the GeoJSON specification to include JSON-LD is a work in progress at the time - of writing. Details can be found on the GeoJSON GitHub.

-
- -
Note
-

It is the hope of the DwC-A format specification authors that the availability - of general metadata vocabulary for describing CSV files, or indeed any tabular text - datasets, will mean that DwC-A can be deprecated. This would allow the biodiversity - community, and initiatives such as GBIF, to spend their efforts developing tools that - support the generic standard rather than their own domain specific conventions and - specifications, thus increasing the accessibility of biodiversity data.

- -

To achieve this goal, it essential that the key characteristics of the DwC-A format - can be adequately described, thus enabling the general metadata vocabulary to be adopted - without needing to modify the existing DwC-A encoded data holdings.

-
- -
- -
-

2.22 Use Case #22 - Making sense of other people's data

-

- (Contributed by Steve Peters via Phil Archer with input from Ian Makgill) -

-

spendnetwork.com harvests spending data from multiple UK local and central government CSV files. - It adds new metadata and annotations to the data and cross-links suppliers to OpenCorporates and, - elsewhere, is beginning to map transaction types to different categories of spending.

-

For example, East Sussex County Council publishes its - spending data as Excel spreadsheets.

-

A snippet of data from East Sussex County Council indicating payments over £500 for the second financial quarter of 2011 is below to illustrate. - White space has been added for clarity. The full data file for that period (saved in CSV format from Microsoft Excel 2007) is provided here: - ESCC-payment-data-Q2281011.csv

-
Example 37
Transparency Q2 - 01.07.11 to 30.09.11 as at 28.10.11,,,,,
-                         Name,          Payment category,   Amount,                        Department,Document no.,Post code
-{snip}
-               MARTELLO TAXIS,   Education HTS Transport,     £620,"Economy, Transport & Environment",  7000785623,     BN25
-               MARTELLO TAXIS,   Education HTS Transport, "£1,425","Economy, Transport & Environment",  7000785624,     BN25
-MCL TRANSPORT CONSULTANTS LTD,        Passenger Services, "£7,134","Economy, Transport & Environment",  4500528162,     BN25
-MCL TRANSPORT CONSULTANTS LTD,Concessionary Fares Scheme,"£10,476","Economy, Transport & Environment",  4500529102,     BN25
-{snip}
-

This data is augmented by spendnetwork.com and presented in a Web page. - The web page for East Sussex County Council is illustrated in Fig. 5 Payments over £500 for East Sussex County Council July-Sept 2011, illustrated by spendnetwork

-
- spendnetwork1.png -
Fig. 5 Payments over £500 for East Sussex County Council July-Sept 2011, illustrated by spendnetwork
-
-

Notice the Linked Data column that links to - OpenCorporates data on MCL Transport Consultants Ltd. - If we follow the 'more' link we see many more cells that spendnetwork would like to include (see - Fig. 6 Payment transaction details, illustrated by spendnetwork). Where data is available - from the original spreadsheet it has been included.

-
- spendnetwork2.png -
Fig. 6 Payment transaction details, illustrated by spendnetwork
-
-

The schema here is defined by a third party (spendnetwork.com) to make sense of the original data within their own model - (only some of which is shown here, spendnetwork.com also tries to categorize transactions and more). This model exists independently of - multiple source datasets and entails a mechanism for reusers to link to the original data from the metadata. - Published metadata can be seen variously as feedback, advertising, enrichment or annotations. Such information could help the publisher to - improve the quality of the original source, however, for the community at large it reduces the need for repetition of the work done to make - sense of the data and facilitates a network effect. It may also be the case that - the metadata creator is better able to put the original data into a wider context with more accuracy and commitment than the original publisher.

-

Another (similar) scenario is LG-Inform. This harvests government statistics from multiple sources, - many in CSV format, and calculate rates, percentages & trends etc. and packages them as a set of performance metrics/measures. Again, it - would be very useful for the original publisher to know, through metadata, that their source has been defined and used (potentially alongside - someone else's data) in this way.

-

See http://standards.esd.org.uk/ and the "Metrics" tab therein; e.g. - percentage of measured children in reception year classified as obese (3333).

-

The analysis of datasets undertaken by both spendnetwork.com and LG-Inform to make sense of other people's tabular data is time-consuming - work. Making that metadata available is a potential help to the original data publisher as well as other would-be reusers of it.

-

- Requires: - WellFormedCsvCheck, - IndependentMetadataPublication, - ZeroEditAdditionOfSupplementaryMetadata, - AnnotationAndSupplementaryInfo, - AssociationOfCodeValuesWithExternalDefinitions, - SemanticTypeDefinition, - URIMapping and - LinkFromMetadataToData. -

-
- -
-

2.23 Use Case #23 - Collating humanitarian information for crisis response

-

- (Contributed by Tim Davies) -

- -

During a crisis response, information managers within the humanitarian community face - a significant challenge in trying to collate data regarding humanitarian needs and response - activities conducted by a large number of humanitarian actors. The schemas for these data - sets are generally not standardized across different actors nor are the mechanisms for - sharing the data. In the best case, this results in a significant delay between the collection - of data and the formulation of that data into a common operational picture. In the worst case, - information is simply not shared at all, leaving gaps in the understanding of the field situation. -

- -

The Humanitarian eXchange Language - (HXL) project seeks to address this concern; enabling information from diverse parties to be - collated into a single "Humanitarian Data Registry". Supporting tools are provided to assist - participants in a given response initiative in finding information within this registry to - meet their needs.

- -

The HXL standard is designed to be a common publishing format for humanitarian data. - A key design principle of the HXL project is that the data publishers are able to continue - publication of their data using their existing systems. Unsurprisingly, data publishers - often provide their data in tabular formats such as CSV, having exported the content from - spreadsheet applications. As a result, the HXL standard is entirely based on tabular data.

- -

During their engagement with the humanitarian response community, the HXL project team have identified two major concerns when working with tabular data:

-
    -
  • Tabular data needs to be created, read by and exchanged between people speaking different languages. Many of these are basic spreadsheet users who find it far easier to use data with natural and clear language in the column headings. Having the column headings in their own language makes creating and interpreting the data a lot easier.
  • -
  • Tabular data needs to be created that contains literal values in multiple languages. For example, the name of a town in English, French and Arabic. The total number of languages that the data might be expressed in cannot be easily determined in advance, and it should be possible for a data manager to introduce a new language variant of a column easily.
  • -
- -

To address these issues, the HXL project have developed a number of - conventions - for publishing tabular data in CSV format.

- -

Column headings in the tabular data are supplemented with short hashtags that are defined in the HXL hashtag dictionary. The hashtag provides the normative meaning of the data in the column while the column header from the original data, a literal text string, is informative. This allows software systems to quickly ascertain the meaning of the data irrespective of the column heading and language used in the original data. For example, where a column provides information on the numbers of people affected by an emergency, the heading may be one of: "People affected", "Affected", "# de personnes concernées", "Afectadas/os" etc. The hashtag #affected is used to provide a common key to interpret the data.

- -
Example 38
. Cluster,     District,  People affected,   People reached
-  #sector,        #adm1,        #affected,         #reached
-     WASH,        Coast,             9000,             9000
-     WASH,    Mountains,             1000,              200
-Education,        Coast,            15500,             8000
-Education,    Mountains,              750,              600
-   Health,        Coast,            20000,             3500
-   Health,    Mountains,             3500,             1500
- -

(whitespace included for clarity)

- -

Requires: - MultipleHeadingRows and - SemanticTypeDefinition.

- -

Hashtags may be supplemented with attributes to refine the meaning of the data. A suggested set of attributes is provided in the HXL hashtag dictionary. For example, attributes may be used to specify the language used for the text in a given column using "+" followed by an ISO 639 language code:

- -
Example 39
.    Project title,             Titre du projet
-      #activity+en,                #activity+fr
-Malaria treatments,     Traitement du paludisme
-  Teacher training,Formation des enseignant(e)s
- -

(whitespace included for clarity)

- -

Requires: - MultilingualContent.

- -

Where multiple data-values for a given field code are provided in a single - row, the field code is repeated - as illustrated in the example below that provides - geocodes for multiple locations pertaining to the subject of the record.

- -
Example 40
P-code  1,P-code  2,P-code  3
-#loc+code,#loc+code,#loc+code
-   020503,         ,
-   060107,   060108,
-   173219,         ,
-   530012,   530013,   530015
-   279333,         ,
- -

(whitespace included for clarity)

- -

Requires: - RepeatedProperties.

- -

In the example above, we see an often repeated pattern where data includes codes to reference some authoritative - term, definition or other resource; e.g. the location code 020503. In order - to make sense of the data, these codes must be reconciled with their official definitions.

- -

Requires: - AssociationOfCodeValuesWithExternalDefinitions.

- -

A snippet of an example of a tabular HXL data file is provided below. A local copy - of the HXL data file is also available: - HXL_3W_samples_draft_Multilingual.csv.

- -
Example 41
Fecha del informe,      Fuente,     Implementador,Código de sector,       Sector / grupo,   Sector / group,    Subsector,     País,Código de provincia, Province,    Region,Código del municipio,Municipality
-   #date+reported,#meta+source,              #org,    #sector+code,           #sector+es,       #sector+en,#subsector+en, #country,         #adm1+code, #adm1+en,#region+en,          #adm2+code,    #adm2+en
-       2013-11-19,Mapaction OP,      World VISION,             S01,Refugio de emergencia,Emergency Shelter,             ,Filipinas,           60400000,    Aklan,        VI,                    ,
-       2013-11-19,   DHNetwork,DFID Medical Teams,             S02,                Salud,           Health,             ,         ,           60400000,    Aklan,        VI,                    ,
-       2013-11-19,   DHNetwork,               MSF,             S02,                Salud,           Health,             ,         ,           60400000,    Aklan,        VI,                    ,
-       2013-11-19,  Cluster 3W,     LDS Charities,             S03,                 WASH,             WASH,      Hygiene,Filipinas,           60400000,    Aklan,        VI,                    ,
-{snip}
- -

(whitespace included for clarity)

- -
-
-

2.24 Use Case #24 - Expressing a hierarchy within occupational listings

-

- (Contributed by Dan Brickley) -

- -

Our user intends to analyze the current state of the job market using information gleaned - from job postings that are published using schema.org markup.

- -
Note

schema.org defines a schema for a listing that describes a - job opening within an organization: JobPosting.

- -

One of the things our user wants to do is to organise the job postings into categories - based on the occupationalCategory - property of each JobPosting.

- -

The occupationalCategory property is used to categorize the described job. The - O*NET-SOC Taxonomy is schema.org's recommended - controlled vocabulary for the occupational categories.

- -

The schema.org documentation notes that value of the occupationalCategory property - should include both the textual label and the formal code from the O*NET-SOC Taxonomy, as - illustrated below in the following RDFa snippet:

- -
Example 42
<br><strong>Occupational Category:</strong> <span property="occupationalCategory">15-1199.03 Web Administrators</span>
- -

The O*NET-SOC Taxonomy is republished every few years; the - occupational listing for 2010 - is the most recent version available. This - listing is also available in CSV format. - An extract from this file is provided below. A local copy of this CSV file is also available: - file = 2010_Occupations.csv.

- -
Example 43
O*NET-SOC 2010 Code,O*NET-SOC 2010 Title,O*NET-SOC 2010 Description
-{snip}
-15-1199.00,"Computer Occupations, All Other",All computer occupations not listed separately.
-15-1199.01,Software Quality Assurance Engineers and Testers,Develop and execute software test plans in order to identify software problems and their causes.
-15-1199.02,Computer Systems Engineers/Architects,"Design and develop solutions to complex applications problems, system administration issues, or network concerns. Perform systems management and integration functions."
-15-1199.03,Web Administrators,"Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications."
-15-1199.04,Geospatial Information Scientists and Technologists,"Research or develop geospatial technologies. May produce databases, perform applications programming, or coordinate projects. May specialize in areas such as agriculture, mining, health care, retail trade, urban planning, or military intelligence."
-15-1199.05,Geographic Information Systems Technicians,"Assist scientists, technologists, or related professionals in building, maintaining, modifying, or using geographic information systems (GIS) databases. May also perform some custom application development or provide user support."
-15-1199.06,Database Architects,"Design strategies for enterprise database systems and set standards for operations, programming, and security. Design and construct large relational databases. Integrate new systems with existing warehouse structure and refine system performance and functionality."
-15-1199.07,Data Warehousing Specialists,"Design, model, or implement corporate data warehousing activities. Program and configure warehouses of database information and provide support to warehouse users."
-15-1199.08,Business Intelligence Analysts,Produce financial and market intelligence by querying data repositories and generating periodic reports. Devise methods for identifying data patterns and trends in available information sources.
-15-1199.09,Information Technology Project Managers,"Plan, initiate, and manage information technology (IT) projects. Lead and guide the work of technical staff. Serve as liaison between business and technical aspects of projects. Plan project stages and assess business implications for each stage. Monitor progress to assure deadlines, standards, and cost targets are met."
-15-1199.10,Search Marketing Strategists,"Employ search marketing tactics to increase visibility and engagement with content, products, or services in Internet-enabled devices or interfaces. Examine search query behaviors on general or specialty search engines or other Internet-based content. Analyze research, data, or technology to understand user intent and measure outcomes for ongoing optimization."
-15-1199.11,Video Game Designers,"Design core features of video games. Specify innovative game and role-play mechanics, story lines, and character biographies. Create and maintain design documentation. Guide and collaborate with production staff to produce games as designed."
-15-1199.12,Document Management Specialists,"Implement and administer enterprise-wide document management systems and related procedures that allow organizations to capture, store, retrieve, share, and destroy electronic records and documents."
-{snip}
- -

The CSV file follows the specification outlined in [RFC4180] - including the use of - pairs of double quotes ("") to escape cells that themselves contain commas.

- -

Also note that each row provides a unique identifier for the occupation it describes. This - unique identifier is given in the O*NET-SOC 2010 Code column. This code can be considered - as the primary key for each row in the listing as it is unique for every row. Furthermore, the value - of the O*NET-SOC 2010 Code column serves as the unique identifier for the occupation.

- -

Requires: - PrimaryKey.

- -

Closer inspection of the O*NET-SOC 2010 code illustrates the hierarchical classification - within the taxonomy. The first six digits are based on the - Standard Occupational Classification (SOC) - code from the US Bureau of Labor Statistics, with further subcategorization thereafter where necessary. - The first and second digits represent the major group; the third digit represents the minor group; - the fourth and fifth digits represent the broad occupation; and the sixth digit represents the - detailed occupation.

- -

The SOC structure (2010) is available in Microsoft Excel 97-2003 Workbook format. - An extract of this structure, in CSV format (exported from Microsoft Excel 2007), is provided below. - A local copy of the SOC structure in CSV is also available: file = soc_structure_2010.csv.

- -
Example 44
Bureau of Labor Statistics,,,,,,,,,
-On behalf of the Standard Occupational Classification Policy Committee (SOCPC),,,,,,,,,
-,,,,,,,,,
-January 2009,,,,,,,,,
-*** This is the final structure for the 2010 SOC.   Questions should be emailed to soc@bls.gov***,,,,,,,,,
-,,,,,,,,,
-,,,,,,,,,
-,,,,,,,,,
-,,,,,,,,,
-,2010 Standard Occupational Classification,,,,,,,,
-,,,,,,,,,
-Major Group,Minor Group,Broad Group,Detailed Occupation,,,,,,
-,,,,,,,,,
-11-0000,,,,Management Occupations,,,,,
-,11-1000,,,Top Executives,,,,,
-,,11-1010,,Chief Executives,,,,,
-,,,11-1011,Chief Executives,,,,,
-{snip}
-,,,13-2099,"Financial Specialists, All Other",,,,,
-15-0000,,,,Computer and Mathematical Occupations,,,,,
-,15-1100,,,Computer Occupations,,,,,
-,,15-1110,,Computer and Information Research Scientists,,,,,
-,,,15-1111,Computer and Information Research Scientists,,,,,
-,,15-1120,,Computer and Information Analysts,,,,,
-,,,15-1121,Computer Systems Analysts,,,,,
-,,,15-1122,Information Security Analysts,,,,,
-,,15-1130,,Software Developers and Programmers,,,,,
-,,,15-1131,Computer Programmers,,,,,
-,,,15-1132,"Software Developers, Applications",,,,,
-,,,15-1133,"Software Developers, Systems Software",,,,,
-,,,15-1134,Web Developers,,,,,
-,,15-1140,,Database and Systems Administrators and Network Architects,,,,,
-,,,15-1141,Database Administrators,,,,,
-,,,15-1142,Network and Computer Systems Administrators,,,,,
-,,,15-1143,Computer Network Architects,,,,,
-,,15-1150,,Computer Support Specialists,,,,,
-,,,15-1151,Computer User Support Specialists,,,,,
-,,,15-1152,Computer Network Support Specialists,,,,,
-,,15-1190,,Miscellaneous Computer Occupations,,,,,
-,,,15-1199,"Computer Occupations, All Other",,,,,
-,15-2000,,,Mathematical Science Occupations,,,,,
-{snip}
- -

The header line here comes below an empty row and is separated from the data by another empty row. - There is metadata about the table in the rows above the header line.

- -

Requires: - MultipleHeadingRows and - AnnotationAndSupplementaryInfo.

- -

Being familiar with SKOS, our user decides - to map both the O*NET-SOC and SOC taxonomies into a single hierarchy expressed using RDF/OWL and the - SKOS vocabulary.

- -

Note that in order to express the two taxonomies in SKOS, the local identifiers used in - the CSV files (e.g. 15-1199.03) must be mapped to URIs.

- -

Requires: - URIMapping.

- -

Each of the five levels used across the occupation classification schemes are assigned to a particular OWL class - each of which is a sub-class of skos:Concept:

-
    -
  • From SOC - -
      -
    • Major Group: ex:SOC-MajorGroup
    • -
    • Minor Group: ex:SOC-MinorGroup
    • -
    • Broad Group: ex:SOC-BroadGroup
    • -
    • Detailed Occupation: ex:SOC-DetailedOccupation
    • -
    -
  • -
  • From O*NET-SOC - -
      -
    • ex:ONETSOC-Occupation
    • -
    -
  • -
- -

The SOC taxonomy contains four different types of entities, and so requires several different - passes to extract each of those from the CSV file. Depending on which kind of entity is being - extracted, a different column provides the unique identifier for the entity. Data in a given - row is only processed if the value for the cell designated as the unique identifier is not blank. - For example, if the Detailed Occupation column is designated as providing the - unique identifier (e.g. to extract entities of type ex:SOC-DetailedOccupation), - then the only rows to be processed in the snippet below would be "Financial Specialists, All Other", - "Computer and Information Research Scientists" and "Computer Occupations, All Other". All other rows - would be ignored.

- -
Example 45
{snip}
-Major Group,Minor Group,Broad Group,Detailed Occupation,                                            ,,,,,
-           ,           ,           ,                   ,                                            ,,,,,
-{snip}
-           ,           ,           ,            13-2099,          "Financial Specialists, All Other",,,,,
-    15-0000,           ,           ,                   ,       Computer and Mathematical Occupations,,,,,
-           ,    15-1100,           ,                   ,                        Computer Occupations,,,,,
-           ,           ,    15-1110,                   ,Computer and Information Research Scientists,,,,,
-           ,           ,           ,            15-1111,Computer and Information Research Scientists,,,,,
-{snip}
-           ,           ,    15-1190,                   ,          Miscellaneous Computer Occupations,,,,,
-           ,           ,           ,            15-1199,           "Computer Occupations, All Other",,,,,
-           ,    15-2000,           ,                   ,            Mathematical Science Occupations,,,,,
-{snip}
- -

(whitespace added for clarity)

- -

Requires: - ConditionalProcessingBasedOnCellValues.

- -

The hierarchy in the SOC structure is implied by inheritance from - the preceeding row(s). For example, the row describing SOC minor group "Computer Occupations" - (Minor Group = 15-1100 (above) has an empty cell value for column Major Group. - The value for SOC major group is provided by the preceeding row. In the case of SOC detailed - occupation "Computer Occupations, All Other" (Detailed Occupation = 15-1199), - the value of value for column Major Group is provided 20 lines previously when a value - in that column was most recently provided. The example snippet below illustrates what the CSV would - look like if the inherited cell values were present:

- - -
Example 46
{snip}
-Major Group,Minor Group,Broad Group,Detailed Occupation,                                            ,,,,,
-           ,           ,           ,                   ,                                            ,,,,,
-{snip}
-    13-0000,    13-2000,    13-2090,            13-2099,          "Financial Specialists, All Other",,,,,
-    15-0000,           ,           ,                   ,       Computer and Mathematical Occupations,,,,,
-    15-0000,    15-1100,           ,                   ,                        Computer Occupations,,,,,
-    15-0000,    15-1100,    15-1110,                   ,Computer and Information Research Scientists,,,,,
-    15-0000,    15-1100,    15-1110,            15-1111,Computer and Information Research Scientists,,,,,
-{snip}
-    15-0000,    15-1100,    15-1190,                   ,          Miscellaneous Computer Occupations,,,,,
-    15-0000,    15-1100,    15-1190,            15-1199,           "Computer Occupations, All Other",,,,,
-    15-0000,    15-2000,           ,                   ,            Mathematical Science Occupations,,,,,
-{snip}
- -

(whitespace added for clarity)

- -

It is difficult to programatically describe how the inherited values should be implemented. - It is not as simple as infering the value for a blank cell from the most recent preceeding row - when a non-blank value was provided for that column. For example, the last row in the example - above describing "Mathematical Science Occupations" does not inherit the values from columns - Broad Group and Detailed Occupation in the preceeding row because - it describes a new level in the hierarchy.

- -

However, given that the SOC code is a string value with regular structure that reflects - the position of a given concept within the hierarchy, it is possible to determine the - identifier of each of the broader concepts by parsing the identifier string. For example, - the regular expression /^(\d{2})-(\d{2})(\d)\d$/ could be used to split the - identifier for a detailed occupation code into its constituent parts from which the - identifiers for the associated broader concepts could be constructed.

- -

Requires: - CellMicrosyntax.

- -

The same kind of processing applies to the O*NET-SOC taxonomy; in this case also extracting - a description for the occupation. There is also an additional complication: where a - O*NET-SOC code ends in ".00", that occupation is a direct mapping to the - occupation defined in the SOC taxonomy. For example, the O*NET-SOC code 15-1199.00 - refers to the same occupation category as the SOC code 15-1199: - "Computer Occupations, All Other"

- -

To implement this complication, we need to use conditional processing.

-

If the final two digits of the O*NET-SOC code are "00", then:

-
    -
  • the entity is of type ex:SOC-DetailedOccupation;
  • -
  • the unique identifier and notation for the concept comprises only the six numerical digits of the O*NET-SOC 2010 Code cell value (e.g. in the form nn-nnnn); and
  • -
  • no skos:broader relationship need be defined.
  • -
- -

else:

-
    -
  • the entity is of type ex:ONETSOC-Occupation;
  • -
  • the unique identifier and notation for the concept comprises the eight numerical digits of the O*NET-SOC 2010 Code cell value (e.g. in the form nn-nnnn.nn); and
  • -
  • a skos:broader relationship is defined with the broader concept from the SOC taxonomy identified by the first six numerical digits of the O*NET-SOC 2010 Codecell value.
  • -
- -

The example below illustrates the conditional behaviour:

- -
Example 47
row:
-----
-
-15-1199.00,"Computer Occupations, All Other",All computer occupations not listed separately.
-
-resulting RDF (in Turtle syntax):
----------------------------------
-
-ex:15-1199 a ex:SOC-DetailedOccupation ;
-    skos:notation "15-1199" ;
-    skos:prefLabel "Computer Occupations, All Other" ;
-    dct:description "All computer occupations not listed separately." .
-
-row:
-----
-
-15-1199.03,Web Administrators,"Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications."
-
-resulting RDF (in Turtle syntax):
----------------------------------
-
-ex:15-1199.03 a ex:ONETSOC-Occupation ;
-    skos:notation "15-1199.03" ;
-    skos:prefLabel "Web Administrators" ;
-    dct:description "Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications." ;
-    skos:broader ex:15-1199 .
- -

Requires: - ConditionalProcessingBasedOnCellValues.

- -

A snippet of the final SKOS concept scheme, expressed in RDF using Turtle [turtle] syntax, resulting - from transformation of the O*NET-SOC and SOC taxonomies into RDF is provided below. Ideally, all - duplicate triples will be removed - such as the skos:prefLabel - property for concept ex:15-1190 which would be provided by both the O*NET-SOC and SOC CSV files.

- -
Example 48
ex:15-0000 a ex:SOC-MajorGroup ;
-    skos:notation "15-0000" ;
-    skos:prefLabel "Computer and Mathematical Occupations" .
-ex:15-1100 a ex:SOC-MinorGroup ;
-    skos:notation "15-1100" ;
-    skos:prefLabel "Computer Occupations" ;
-    skos:broader ex:15-0000 .
-ex:15-1190 a ex:SOC-BroadGroup ;
-    skos:notation "15-1190" ;
-    skos:prefLabel "Miscellaneous Computer Occupations" ;
-    skos:broader ex:15-0000, ex:15-1100 .
-ex:15-1199 a ex:SOC-DetailedOccupation ;
-    skos:notation "15-1199" ;
-    skos:prefLabel "Computer Occupations, All Other" ;
-    dct:description "All computer occupations not listed separately." ;
-    skos:broader ex:15-0000, ex:15-1100, ex:15-1190 .
-ex:15-1199.03 a ex:ONETSOC-Occupation ;
-    skos:notation "15-1199.03" ;
-    skos:prefLabel "Web Administrators" ;
-    dct:description "Manage web environment design, deployment, development and maintenance activities. Perform testing and quality assurance of web sites and web applications." ;
-    skos:broader ex:15-0000, ex:15-1100, ex:15-1190, ex:15-1199 .
- -

Once the SKOS concept scheme has been defined, it is possible for our user to group - job postings by SOC Major Group, SOC Minor Group, SOC Broad Group, SOC Detailed Occupation - and O*NET-SOC Occupation to provide summary statistics about the job market.

- -

For example, we can use the SKOS concept scheme to group job postings for "Web Administrators" (code 15-1199.03) as follows:

-
    -
  • 15-0000 "Computer and Mathematical Occupations" (SOC major group)
  • -
  • 15-1100 "Computer Occupations" (SOC minor group)
  • -
  • 15-1190 "Miscellaneous Computer Occupations" (SOC broad occupation)
  • -
  • 15-1199 "Computer Occupations, All Other" (SOC detailed occupation)
  • -
  • 15-1199.03 "Web Administrators"
  • -
-
-
-

2.25 Use Case #25 - Consistent publication of local authority data

-

Open data and transparency are foundational elements within the UK Government's approach to improve public service. The Local Government Association (LGA) promotes open and transparent local government to meet local needs and demands; to innovate and transform services leading to improvements and efficiencies, to drive local economic growth and to empower citizen and community groups to choose or run services and shape neighbourhoods.

- -

As part of this initiative, the LGA is working to put local authority data into the public realm in ways that provide real benefits to citizens, business, councils and the wider data community. The LGA provides a web portal to help identify open data published by UK local authorities and encourage standardisation of local open data; enabling data consumers to browse through datasets published by local authorities across the UK and providing guidance and tools to data publishers to drive consistent practice in publication.

- -

Data is typically published in CSV format.

- -

An illustrative example is provided for data describing public toilets. The portal lists datasets of information about public toilets provided by more than 70 local authorities. In order to ensure consistent publication of data about public toilets the LGA provides both guidance documentation and a machine-readable schema against which datasets may be validated using on-line tools.

- -

The public toilets CSV schema has 32 (mandated or optional) fields. The validator tool allows columns to appear in any order, matching the column order to the schema based on the title in the column header. Furthermore, CSV files containing additional columns, such as SecureDisposalofSharps specified within the public toilet dataset for Bath and North East Somerset (as shown below), are also considered valid. Additional columns are included where one or more local authorities have specific requirements to include additional information to satisfy local needs. Such additional columns are not supported using formal 'extensions' of the schema as the organisational and administrative burden of doing so was considered too great.

- -
Example 49
ExtractDate,OrganisationURI,OrganisationLabel,ServiceTypeURI,ServiceTypeLabel,LocationText,StreetAddress,LocalityAddress,TownAddress,Postcode,GeoAreaWardURI,GeoAreaWardLabel,UPRN,CoordinateReferenceSystem,GeoX,GeoY,GeoPointLicensingURL,Category,AccessibleCategory,BabyChange,SecureDisposalofSharps,OpeningHours,ManagingBy,ChargeAmount,Notes
-15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,CHARLOTTE STREET ENTRANCE,CHARLOTTE STREET,KINGSMEAD,BATH,BA1 2NE,http://statistics.data.gov.uk/id/statistical-geography/E05001949,Kingsmead,10001147066,OSGB36,374661,165006,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,TRUE,TRUE,24 Hours ,BANES COUNCIL AND HEALTHMATIC,0.2,
-15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,ALICE PARK,GLOUCESTER ROAD,LAMBRIDGE,BATH,BA1 7BL,http://statistics.data.gov.uk/id/statistical-geography/E05001950,Lambridge,10001146447,OSGB36,376350,166593,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,TRUE,TRUE,06:00-21:00,BANES COUNCIL AND HEALTHMATIC,0.2,
-15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,HENRIETTA PARK,HENRIETTA ROAD,ABBEY,BATH,BA2 6LU,http://statistics.data.gov.uk/id/statistical-geography/E05001935,Abbey,10001147120,OSGB36,375338,165170,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,FALSE,Female and male,Winter & Su 10:00-16:00 | Other times: 08:00-18:00,BANES COUNCIL AND HEALTHMATIC,0,Scheduled for improvement Autumn 2014
-15/09/2014,http://opendatacommunities.org/id/unitary-authority/bath-and-north-east-somerset,Bath and North East Somerset,http://id.esd.org.uk/service/579,Public Toilets,SHAFTESBURY ROAD,SHAFTESBURY ROAD,OLDFIELD ,BATH,BA2 3LH,http://statistics.data.gov.uk/id/statistical-geography/E05001958,Oldfield,10001147060,OSGB36,373809,164268,http://www.nationalarchives.gov.uk/doc/open-government-licence/version/2/,Female and male,Female and male,TRUE,TRUE,24 Hours ,BANES COUNCIL AND HEALTHMATIC,0.2,
-{snip}
- -

A local copy of this dataset is included for convenience.

- -

Requires: - WellFormedCsvCheck, - CsvValidation and - SyntacticTypeDefinition. -

-
-
- -
-

3. Requirements

-
-

3.1 Accepted requirements

-
-

3.1.1 CSV parsing requirements

-
-
R-NonStandardCellDelimiter
-
- - Ability to parse tabular data with cell delimiters other than comma (,) - -

Tabular data is often provided with cell delimiters other than comma (,). - Fixed width formatting is also commonly used.

-

If a non-standard cell delimiter is used, it shall be possible to inform the CSV parser about the - cell delimiter or fixed-width formatting.

-

- Motivation: - DisplayingLocationsOfCareHomesOnAMap, - SurfaceTemperatureDatabank, - SupportingSemantic-basedRecommendations, - PublicationOfBiodiversityInformation and - PlatformIntegrationUsingSTDF. -

-
Note
-

Standardizing the parsing of CSV is outside the chartered scope of the Working Group. However, [tabular-data-model] section 8. Parsing Tabular Data provides non-normative hints to creaters of parsers to help them handle the wide variety of CSV-based formats that they may encounter due to the current lack of standardization of the format.

-

An annotated table may use the delimiter annotation, specified as part of a dialect description, to declare a string that is used to delimit cells in a given row. The default value is ",". See [tabular-metadata] section 5.9 Dialect Descriptions for further details.

-
-
-
R-CommentLines
-
- - Ability to identify comment lines within a CSV file and skip over them during parsing, format conversion or other processing - -

A tabular datafile may include comment lines. It shall be possible to declare how to recognize - a comment line within the data (e.g. by specifying a sequence of characters that are found - at the beginning of every comment line).

-

Comment lines shall not be treated as data when parsing, converting or processing the - CSV file. During format conversion, the application may try to include the comment in the - conversion.

-

- Motivation: - PlatformIntegrationUsingSTDF. -

-
Note
-

Standardizing the parsing of CSV is outside the chartered scope of the Working Group. However, [tabular-data-model] section 8. Parsing Tabular Data provides non-normative hints to creaters of parsers to help them handle the wide variety of CSV-based formats that they may encounter due to the current lack of standardization of the format.

-

An annotated table may use the comment prefix annotation, specified as part of a dialect description, to declare a string that, when appearing at the beginning of a row, indicates that the row is a comment that should be associated as a rdfs:comment annotation to the table. The default value is "#". See [tabular-metadata] section 5.9 Dialect Descriptions for further details.

-
-
-
-
-
-

3.1.2 Applications requirements

-
-
R-CsvValidation
-
- - Ability to validate a CSV for conformance with a specified metadata definition - -

The content of a CSV often needs to be validated for conformance against a - specification. A specification may be expressed in machine-readable format as defined in - the Metadata Vocabulary for Tabular Data [tabular-metadata].

-

Validation shall assess conformance against structural definitions such as number of - columns and the datatype for a given column. Further validation needs are to be - determined. It is anticipated that validation may vary based on row-specific attributes - such as the type of entity described in that row.

-

- Dependency: - R-WellFormedCsvCheck -

-

- Motivation: - DigitalPreservationOfGovernmentRecords, - OrganogramData, - ChemicalImaging, - ChemicalStructures, - DisplayingLocationsOfCareHomesOnAMap, - NetCdFcDl, - PaloAltoTreeData and - ConsistentPublicationOfLocalAuthorityData. -

-
Note
-

Validation of tabular data, as specified in [tabular-data-model] section 6.6 Validating Tables, includes the following aspects:

-
    -
  • assessing compatibility of the table with associate metadata - checking the correct number of non-virtual columns and matching names/titles for columns where these are specficied in a header row;
  • -
  • ensuring uniqueness of primary keys;
  • -
  • checking that all foreign keys are valid; and
  • -
  • cell validation.
  • -
-

As described in [tabular-data-model] section 4.6 Datatypes, cell validation includes assessment of the literal content of the cell (e.g. length of string or number of bytes) and of the value inferred from parsing that literal content (e.g. formatting and numerical constraints).

-
-
-
R-RightToLeftCsvDeclaration
-
- - Ability to determine that a CSV should be rendered using RTL column ordering and RTL text direction in cells. - -

It shall be possible to declare whether a given tabular data file should be rendered with column order direction Right-to-Left (RTL); e.g. the first column on the far right, with subsequent columns displayed to the left of the preceeding column. It shall also be possible to declare that the content of cells in particular columns are rendered RTL.

-

A "RTL aware" application should use the RTL declaration to determine how to display the a given data file. Automatic detection of appropriate rendering shall be the default behaviour (in absence of any such declaration).

-
Note
-

The directionality of the content does not affect the logical structure of the tabular data; i.e. the cell at index zero is followed by the cell at index 1, and then index 2 etc. As a result, parsing of RTL tabular data is anticipated to be identical to LTR content.

-
-

- Motivation: - SupportingRightToLeftDirectionality. -

-
Note

- It is possible to set the column direction using the tableDirection property and the text direction on columns using the textDirection property, as defined in [tabular-metadata]. -

-
-
R-CsvToRdfTransformation
-
- - Ability to transform a CSV into RDF - -

Standardised CSV to RDF transformation mechanisms mitigate the need for bespoke - transformation software to be developed by CSV data consumers, thus simplifying the - exploitation of CSV data. Local identifiers for the entity described in a given row or - used to reference some other entity need to be converted to URIs. RDF properties - (or property paths) need to be determined to relate the entity described - within a given row to the corresponding data values for that row. Where available, - the type of a data value should be incorporated in the resulting RDF. Built-in types - defined in RDF 1.1 [rdf11-concepts] (e.g. - xsd:dateTime, - xsd:integer - etc.) and types defined in other RDF vocabularies / OWL ontologies (e.g. geo:wktLiteral, - GeoSPARQL [geosparql] section 8.5.1 RDFS Datatypes refers) shall be supported.

-

- Dependency: - R-SemanticTypeDefinition, - R-SyntacticTypeDefinition and - R-URIMapping. -

-

- Motivation: - DigitalPreservationOfGovernmentRecords, - OrganogramData, - PublicationOfPropertyTransactionData, - RepresentingEntitiesAndFactsExtractedFromText, - CanonicalMappingOfCSV, - PublicationOfBiodiversityInformation and - ExpressingHierarchyWithinOccupationalListings. -

-
Note
-

[csv2rdf] specifies the transformation of an annotated table to RDF; providing both minimal mode, where RDF output includes triples derived from the data within the annotated table, and standard mode, where RDF output additionally includes triples describing the structure of the annotated table.

-

Built-in datatypes are limited to those defined in [tabular-data-model] section 4.6 Datatypes. geo:wktLiteral and other datatypes from [geosparql] are not supported natively.

-
-
-
R-CsvToJsonTransformation
-
- - Ability to transform a CSV into JSON - -

Standardised CSV to JSON transformation mechanisms mitigate the need for bespoke transformation software to be developed by CSV data consumers, thus simplifying the exploitation of CSV data.

-

- Motivation: - DisplayingLocationsOfCareHomesOnAMap, - IntelligentlyPreviewingCSVFiles, - CanonicalMappingOfCSV and - PublicationOfBiodiversityInformation. -

-
Note
-

[csv2json] specifies the transformation of an annotated table to JSON; providing both minimal mode, where JSON output includes objects derived from the data within the annotated table, and standard mode, where JSON output additionally includes objects describing the structure of the annotated table. In both modes, the transformation provides 'prettyfication' of the JSON output where objects are nested rather than forming a flat list of objects with relations.

-

Built-in datatypes from the annotated table, as defined in [tabular-data-model] section 4.6 Datatypes, are mapped to JSON primitive types.

-
-
-
R-CanonicalMappingInLieuOfAnnotation
-
- - Ability to transform CSV conforming to the core tabular data model yet lacking further - annotation into a object / object graph serialisation - -

A CSV conforming with the - core tabular data model [tabular-data-model], yet lacking - any annotation that defines rich semantics for that data, shall be able to be transformed into - an object / object graph serialisation such as JSON, XML or RDF using systematic rules - a "canonical" - mapping.

-

The canonical mapping should provide automatic scoping of local identifiers (e.g. conversion to - URI), identification of primary keys and detection of data types.

-

- Motivation: - CanonicalMappingOfCSV. -

-
Note
-

An annotated table is always generated by applications implementing this specification when processing tabular data; albeit that without supplementary metadata, those annotations are limited (e.g. the titles annotation may be populated from the column headings provided within the tabular data file). Transformations to both RDF and JSON operate on the annotated table, therefore, a canonical transformation is achieved by transforming an annotated table that has not been informed by supplementary metadata.

-
-
-
R-IndependentMetadataPublication
-
- - Ability to publish metadata independently from the tabular data resource it describes - -

Commonly, tabular datasets are published without the supplementary metadata that enables a third party to - correctly interpret the published information. An independent party - in addition to the data publisher - - shall be able to publish metadata about such a dataset, thus enabling a community of users to benefit from - the efforts of that third party to understand that dataset.

-

- Dependency: - R-LinkFromMetadataToData and - R-ZeroEditAdditionOfSupplementaryMetadata. -

-

- Motivation: - MakingSenseOfOtherPeoplesData and - PublicationOfBiodiversityInformation. -

-
Note
-

[tabular-metadata] specifies the format and structure of a metadata file that may be used to provide supplementary annotations on an annotated table or group of tables.

-
-
-
R-SpecificationOfPropertyValuePairForEachRow
-
- - Ability to define a property-value pair for inclusion in each row - -

When annotating tabular data, it should be possible for one to define within the metadata - a property-value pair that is repeated for every row in the tabular dataset; for example, - the location ID for a set of weather observations, or the dataset ID for a set of - biodiversity observations.

-

In the case of sparsely populated data, this property-value pair must be applied as a - default only where that property is absent from the data.

-

As an illustration, the Darwin Core Archive standard - provides the ability to specify such a property value pair within its metadata description - file meta.xml.

- -
Example 50
http://data.gbif.org/download/specimens.csv
--------------------------------------------
-
-ID,Species,Count
-123,"Cryptantha gypsophila Reveal & C.R. Broome",12
-124,"Buxbaumia piperi",2
-
-meta.xml
---------
-
-<archive xmlns="http://rs.tdwg.org/dwc/text/">
-  <core ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord">
-    <files>
-      <location>http://data.gbif.org/download/specimens.csv</location>
-    </files>
-    <field index="0" term="http://rs.tdwg.org/dwc/terms/catalogNumber" />
-    <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName" />
-    <field index="2" term="http://rs.tdwg.org/dwc/terms/individualCount" />
-    <field term="http://rs.tdwg.org/dwc/terms/datasetID" default="urn:lsid:tim.lsid.tdwg.org:collections:1"/>
-  </core>
-</archive>
-

Thus the original tabular data file specimens.csv is interpreted as:

-
Example 51
catalogNumber,scientificName,individualCount,datasetID
-123,"Cryptantha gypsophila Reveal & C.R. Broome",12,urn:lsid:tim.lsid.tdwg.org:collections:1
-124,"Buxbaumia piperi",2,urn:lsid:tim.lsid.tdwg.org:collections:1
- -

- Motivation: - PublicationOfBiodiversityInformation. -

-
Note
-

To meet this requirement a virtual column, as specified in [tabular-data-model], must be specified for the additional property-value pair that is to be included in each row. The default annotation may be used to specify a string value that is used for every empty cell in the associated column. Alternatively, the value URL annotation provides an absolute URL for a given cell. [tabular-metadata] specifies how a URI Template, specified in [RFC6570], may be used to specify the value URL using the valueURL property.

-
-
-
R-ZeroEditAdditionOfSupplementaryMetadata
-
- - Ability to add supplementary metadata to an existing CSV file without - requiring modification of that file - -

It may not be possible for a tabular data file to be modified to include the supplementary - metadata required to adequately describe the content of the data file. For example, the - data may be published by a third party or the user may be constrained in their workflow - by choice of tools that do not support or even recognize the supplementary metadata.

-

It shall be possible to add provide annotations about a given tabular data file without - requiring that file to be modified in any way; "zero-edit" addition.

-

- Dependency: - R-LinkFromMetadataToData. -

-

- Motivation: - PublicationOfNationalStatistics, - SurfaceTemperatureDatabank, - MakingSenseOfOtherPeoplesData and - PublicationOfBiodiversityInformation. -

-
Note
-

Please refer to R-CanonicalMappingInLieuOfAnnotation for details of the requirement to transform a tabular data lacking any supplementary metadata.

-
-
Note
-

[tabular-metadata] specifies the format and structure of a metadata file that may be used to provide supplementary annotations on an annotated table or group of tables. Through use of such a metadata file, one may provide supplementary annotations without needing to edit the source tabular data file. Applications may use alternative mechanisms to gather annotations on an annotated table or group of tables.

-
-
-
R-LinkFromMetadataToData
-
- - Ability for a metadata description to explicitly cite the tabular dataset it describes - -

Metadata resources may be published independently from the tabular dataset(s) it describes; e.g. a third - party may publish metadata in their own domain that describes how they have interpreted the data for their - application or community. In such a case, the relationship between the metadata and data resources cannot - be inferred - it must be stated explicitly.

-

Such a link between metadata and data resources should be discoverable, thus enabling a data publisher to - determine who is referring to their data leading to the data publisher gaining a better understanding - of their user community.

-

- Motivation: - MakingSenseOfOtherPeoplesData and - PublicationOfBiodiversityInformation. -

-
Note
-

In addition to providing mechanisms to locate metadata relating to a tabular data file (see [tabular-data-model] section 5. Locating Metadata), the url annotation is used to define URL of the source data for an annotated table; for example, referring to a specific CSV file.

-
-
-
-
-
-

3.1.3 Data model requirements

-
-
R-PrimaryKey
-
- - Ability to determine the primary key for rows within a tabular data file - -

It shall be possible to uniquely identify every row within a tabular data file. The - default behaviour for uniquely identifying rows is to use the row number. However, some - datasets already include a unique identifier for each row in the dataset. In such - cases, it shall be possible to declare which column provides the primary key.

-

- Motivation: - DigitalPreservationOfGovernmentRecords, - OrganogramData, - ChemicalImaging, - PaloAltoTreeData and - ExpressingHierarchyWithinOccupationalListings. -

-
Note
-

The primary key annotation, as specified in [tabular-data-model], may be used to define a primary key. Primary keys may be compiled from multiple values in a given row.

-
-
-
R-ForeignKeyReferences
-
- - Ability to cross reference between CSV files - -

To interpret data in a given row of a CSV file, one may need to be able to refer to - information provided in supplementary CSV files or elsewhere within the same CSV file; - e.g. using a foreign key type reference. The cross-referenced CSV files may, or may - not, be packaged together.

-

- Motivation: - DigitalPreservationOfGovernmentRecords, - OrganogramData, - SurfaceTemperatureDatabank, - RepresentingEntitiesAndFactsExtractedFromText and - SupportingSemantic-basedRecommendations. -

-
Note
-

The foreign keys annotation, as specified in [tabular-data-model], may be used to provide a list of foreign keys for an annotated table. To successfully validate, any cell value in a column referenced by the foreign key statement must have a unique value in the column of the referenced annotated table.

-

As an alternative to the strong validation provided by foreign keys, references or links between rows may be asserted. The target must be identified by URI as is defined using the value URL annotation, as specified in [tabular-data-model]. Where the target is defined in another annotated table, the identity of the subject (or subjects) which the row in that table describes is defined using the about URL annotation for the cells in the target row.

-
-
-
R-AnnotationAndSupplementaryInfo
-
- - Ability to add annotation and supplementary information to CSV file - -

Annotations and supplementary information may be associated with:

-
    -
  • a group of tables
  • -
  • an entire table
  • -
  • a row
  • -
  • a column
  • -
  • an individual cell
  • -
  • range (or region) of cells within a table
  • -
-

Annotations and supplementary information may be literal values or references to a - remote resource. The presence of annotations or supplementary information must not - adversely impact parsing of the tabular data (e.g. the annotations and supplementary - information must be logically separate).

- -
Note
-

This requirement refers to provision of human-readable annotation providing additional context to a group of tables, table, column, row, cell or other region within a table. For example, the publication of national statistics use case adds the following annotations to a table:

-
    -
  • title: Economic activity
  • -
  • dimensions: Economic activity (T016A), 2011 Administrative Hierarchy, 2011 Westminster Parliamentary Constituency Hierarchy
  • -
  • dataset population: All usual residents aged 16 to 74
  • -
  • coverage: England and Wales
  • -
  • area types (list omitted here for brevity)
  • -
  • textual description of dataset
  • -
  • publication information
  • -
  • contact details
  • -
-

This is disjoint from the requirements regarding the provision of supplementary - metadata to describe the content and structure of a tabular data file in a machine - readable form.

-
- -

- Motivation: - PublicationOfNationalStatistics, - SurfaceTemperatureDatabank, - PublicationOfPropertyTransactionData, - AnalyzingScientificSpreadsheets, - ReliabilityAnalyzesOfPoliceOpenData, - OpenSpendingData, - RepresentingEntitiesAndFactsExtractedFromText, - IntelligentlyPreviewingCSVFiles, - CanonicalMappingOfCSV, - SupportingSemantic-basedRecommendations, - MakingSenseOfOtherPeoplesData, - PublicationOfBiodiversityInformation, - ExpressingHierarchyWithinOccupationalListings and - PlatformIntegrationUsingSTDF. -

-
Note
-

Any annotation may be used in addition to the core annotations specified in [tabular-data-model], such as title, author, license etc. [tabular-metadata] section 5.8 Common Properties describes how such 'non-core' annotations are provided in a supplementary metadata file.

-

Any number of additional annotations may be provided for a group of tables or an annotated table; see table-group-notes and table-notes respectively.

-
-
Note

The Web Annotation Working Group is developing a vocabulary for expressing annotations. An example use of the table-notes annotation and the Web Annotation Working Group's open annotation vocabulary is provided in [csv2rdf].

-
-
R-AssociationOfCodeValuesWithExternalDefinitions
-
- - Ability to associate a code value with externally managed definition - -

CSV files make frequent use of code values when describing data. Examples include: - geographic regions, status codes and category codes. In some cases, names are used - as a unique identifier for a resource (e.g. company name wihtin a transaction audit). - It is difficult to interpret the - tabular data with out an unambiguous definition of the code values or (local) identifiers used.

-

It must be possible to unambiguously associate the notation used within a CSV file - with the appropriate external definition.

-

- Dependency: - URIMapping. -

-

- Motivation: - PublicationOfNationalStatistics, - PublicationOfPropertyTransactionData, - SurfaceTemperatureDatabank, - OpenSpendingData, - RepresentingEntitiesAndFactsExtractedFromText, - IntelligentlyPreviewingCSVFiles, - SupportingSemantic-basedRecommendations, - MakingSenseOfOtherPeoplesData, - PublicationOfBiodiversityInformation and - CollatingHumanitarianResponseInformation. -

-
Note
-

Code values expressed within a cell can be associated with external definitions in two ways:

-
    -
  1. The valueURL property, as defined in [tabular-metadata], may be used to provide a URI Template that converts the code value to a URI, thus explicitly identifying the associated external definition. URI Templates are defined in [RFC6570].
  2. -
  3. The foreignKeys property, as defined in [tabular-metadata], may be used to provide a foreign key definition that relates the values in a column of the annotated table to those in a column of another annotated table. The definition of the code value could be provided in the table referenced via the foreign key.
  4. -
-
-
-
R-SyntacticTypeDefinition
-
- - Ability to declare syntactic type for cells within a specified column. - -

Whilst it is possible to automatically detect the type of data (e.g. date, number) in a - given cell, this can be error prone. For example, the date April 1st if written - as 1/4 may be interpreted as a decimal fraction.

-

It shall be possible to declare the data type for the cells in a given column of a - tabular data file. Only one data type can be declared for a given column.

-
Note
-

An application may still attempt to automatically detect the data type for a given - cell. However, the explicit declaration shall always take precedent.

-
-
Note
-

The data type declaration will typically be used to declare that a column contains - integers, floating point numbers or text. However, it may be used to assert that a cell - contains, say, embedded - XML content (rdf:XMLLiteral), - datetime values (xsd:dateTime) or - geometry expressed as well-known-text (geo:wktLiteral, - GeoSPARQL [geosparql] section 8.5.1 RDFS Datatypes refers).

-
-

- Motivation: - SurfaceTemperatureDatabank, - DigitalPreservationOfGovernmentRecords, - ReliabilityAnalyzesOfPoliceOpenData, - AnalyzingScientificSpreadsheets, - RepresentingEntitiesAndFactsExtractedFromText, - DisplayingLocationsOfCareHomesOnAMap, - IntelligentlyPreviewingCSVFiles, - CanonicalMappingOfCSV, - SupportingSemantic-basedRecommendations, - PublicationOfBiodiversityInformation, - PlatformIntegrationUsingSTDF and - ConsistentPublicationOfLocalAuthorityData. -

-
Note
-

The syntactic type for a cell value is defined using the datatype annotation. [tabular-data-model] section 4.6 Datatypes lists the built-in datatypes used in this specification; including those defined in [xmlschema11-2] plus number, binary, datetime, any, html, and json. Datatypes can be derived from the built-in datatypes using further annotations; [tabular-metadata] section 5.11.2 Derived Datatypes specifies how to describe derived datatypes within the a metadata file.

-
-
-
R-SemanticTypeDefinition
-
- - Ability to declare semantic type for cells within a specified column. - -

Each row in a tabular data set describes a given resource or entity. The properties - for that entity are described in the cells of that row. All the cells in a given column - are anticipated to provide the same property.

-

It shall be possible to declare the semantic relationship between the entity that a - given row describes and a cell in a given column.

-

The following example of an occupational listing - illustrates how a row of tabular data can be mapped to equivalent content expressed in RDF (Turtle).

-

The mappings are:

- -
Example 52
CSV
----
-
-O*NET-SOC 2010 Code,O*NET-SOC 2010 Title,O*NET-SOC 2010 Description
-         11-1011.00,    Chief Executives,"Determine and formulate policies and provide overall direction of companies [...]."
-{snip}
-
-RDF (Turtle)
-------------
-
-ex:11-1011.00
-    skos:notation "11-1011.00" ;
-    rdfs:label "Chief Executives" ;
-    dc:description "Determine and formulate policies and provide overall direction of companies [...]." .
- -

A copy of the occupational listing CSV is available locally.

- -
Note
-

To express semantics in a machine readable form, RDF seems the appropriate choice. - Furthermore, best practice indicates that one should adopt common and widely adopted - patterns (e.g. RDF vocabularies, OWL ontologies) when publishing data to enable a - wide audience to consume and understand the data. Existing (de facto) standard - patterns may add complexity when defining the semantics associated with a particular - row such that a single RDF predicate is insufficient.

-

For example, to express a quantity value using QUDT - we use an instance of qudt:QuantityValue to relate the numerical value - with the quantity kind (e.g. air temperature) and unit of measurement (e.g. - Celsius). Thus the semantics needed for a column containing temperature values might - be: qudt:value/qudt:numericValue – more akin to a LDPath.

-

Furthermore, use of OWL axioms when defining a sub-property of - qudt:value would allow the quantity type and unit of measurement to - be inferred, with the column semantics then being specified as - ex:temperature_Cel/qudt:numericValue.

-
-

- Motivation: - DigitalPreservationOfGovernmentRecords, - PublicationOfNationalStatistics, - SurfaceTemperatureDatabank, - ReliabilityAnalyzesOfPoliceOpenData, - AnalyzingScientificSpreadsheets, - RepresentingEntitiesAndFactsExtractedFromText, - IntelligentlyPreviewingCSVFiles, - SupportingSemantic-basedRecommendations, - MakingSenseOfOtherPeoplesData, - PublicationOfBiodiversityInformation and - CollatingHumanitarianResponseInformation. -

-
Note
-

The property URL annotation provides the URI for the property relating the value of a given cell to its subject. [tabular-metadata] specifies how a URI Template, specified in [RFC6570], may be used to specify the property URL using the propertyURL property. This property is normally specified for the column and inherited by all the cells within that column.

-
-
-
R-MissingValueDefinition
-
- - Ability to declare a "missing value" token and, optionally, a reason for the - value to be missing - -

Significant amounts of existing tabular text data include values such as - -999. Typically, these are outside the normal expected range of values - and are meant to infer that the value for that cell is missing. Automated parsing of - CSV files needs to recognise such missing value tokens and behave accordingly. - Furthermore, it is often useful for a data publisher to declare why a value - is missing; e.g. withheld or aboveMeasurementRange

-

- Motivation: - SurfaceTemperatureDatabank, - OrganogramData, - OpenSpendingData, - NetCdFcDl, - PaloAltoTreeData and - PlatformIntegrationUsingSTDF. -

-
Note
-

[tabular-data-model] defines the null annotation which defines the string or strings that, when matched to the literal content of a cell, cause the cell's value to be interpretted as null (or empty).

-
-
-
R-URIMapping
-
- - Ability to map cell values within a given column into corresponding URI - -

Tabular data often makes use of local identifiers to uniquely identify an entity described within a tabular data file or to reference an entity described in the same data file or elsewhere (e.g. reference data, code lists, etc.). The local identifier will often be unique within a particular scope (e.g. a code list or data set), but cannot be guaranteed to be globally unique. In order to make these local identifiers globally unique (e.g. so that - the entity described by a row in a tabular data file can be referred to from an external source, or to establish links between the tabular data and the related reference data) it is necessary to map those local identifiers to URIs.

-

It shall be possible to declare how local identifiers used within a column of a particular dataset can be mapped to their respective URI. Typically, this may be achieved by concatenating the local identifier with a prefix - although more complex mappings are anticipated such as removal of "special characters" that are not permitted in URIs (as defined in [RFC3986]) or CURIEs [curie]).

-

Furthermore, where the local identifier is part of a controlled vocabulary, code list or thesaurus, it should be possible to specify the URI for the controlled vocabulary within which the local identfier is defined.

-
Note

Also see the related requirement R-ForeignKeyReferences.

-

- Motivation: - DigitalPreservationOfGovernmentRecords, - OrganogramData, - PublicationOfPropertyTransactionData, - AnalyzingScientificSpreadsheets, - RepresentingEntitiesAndFactsExtractedFromText, - PaloAltoTreeData, - PublicationOfBiodiversityInformation, - MakingSenseOfOtherPeoplesData and - ExpressingHierarchyWithinOccupationalListings. -

-
Note
-

The valueURL property from [tabular-metadata] specifies how a URI Template, as defined in [RFC6570], may be used to map literal contents of a cell to a URI. The result of evaluating the URI Template is stored in the value URL annotation for each cell.

-
-
-
R-UnitMeasureDefinition
-
- - Ability identify/express the unit of measure for the values reported in a given column. - -

Data from measurements is often published and exchanged as tabular data. In order for the values of those measurements to be correctly understood, it is essential that the unit of measurement associated with the values can be specified. For example, without specifying the unit of measurement as kilometers, the floating point value 21.5 in a column entitled distance is largely meaningless.

-

- Motivation: - AnalyzingScientificSpreadsheets, - OpenSpendingData, - IntelligentlyPreviewingCSVFiles, - ChemicalImaging, - ChemicalStructures, - NetCdFcDl and - PaloAltoTreeData -

-
Note
-

This specification provides no native mechanisms for expressing the unit of measurement associated with values of cells in a column.

-

However, annotations may be used to provide this additional information. The [tabular-data-primer] provides examples of how this might be achieved; from providing descriptive metadata for the column, to enabling transformation of cell values to structured data with unit of measurement properties.

-

Also note that the [vocab-data-cube] provides another alternative for annotations; structural metadata is used to provide the metadata required to interpret data values - such as the unit of measurement.

-
-
-
R-GroupingOfMultipleTables
-
- - Ability to group multiple data tables into a single package for - publication - -

When publishing sets of related data tables, it shall be possible to provide annotation for the - group of related tables. Annotation concerning a group of tables may include summary - information about the composite dataset (or "group") that the individual tabular datasets belong too, - such as the license under which the dataset is made available.

-

The implication is that the group shall be identified as an entity - in its own right, thus enabling assertions to be made about that group. The relationship - between the group and the associated tabular datasets will need to be made explicit.

-

Furthermore, where appropriate, it shall be possible to describe the interrelationships - between the tabular datasets within the group.

-

The tabular datasets comprising a group need not be hosted at the same URL. As such, - a group does not necessarily to be published as a single package (e.g. as a zip) - although we - note that this is a common method of publication.

-

- Motivation: - PublicationOfNationalStatistics, - OrganogramData, - ChemicalStructures and - NetCdFcDl. -

-
Note
-

The group of tables, as defined in [tabular-data-model] is a first class entity within the tabular data model. A group of tables comprises a set of annotated tables and a set of annotations that relate to that group of tables.

-
-
-
R-MultilingualContent
-
- - Ability to declare a locale / language for content in a specified column - -

Tabular data may contain literal values for a given property in multiple languages. For example, the name of a town in English, French and Arabic. It shall be possible to:

-
    -
  • specify the property for which the literal values are supplied; and
  • -
  • specify the language / locale relevant to all data values in a given column.
  • -
-

Additionally, it should be possible to provide supplementary labels for column headings in multiple languages.

-

- Motivation: - CollatingHumanitarianResponseInformation. -

-
Note
-

The lang annotation, as defined in [tabular-data-model], may be used to express the code for the expected language for values of cells in a particular column. The language code is expressed in the format defined by [BCP47].

-

Furthermore, the titles annotation allows for any number of human-readable titles to be given for a column, each of which may have an associated language code as defined by [BCP47].

-
-
-
R-RepeatedProperties
-
- - Ability to provide multiple values of a given property for a single entity described within a tabular data file - -

It is commonplace for a tabular data file to provide multiple values of a given property - for a single entity. This may be achieved in a number of ways.

-

First, the multiple rows may be used to describe the same entity; each such row using the same - unique identifier for the entity. For example, a country, identified using its - two-letter country code, - may have more than one name:

-
Example 53
CSV:
-----
-
-country,name
-AD,     Andorra
-AD,     Principality of Andorra
-AF,     Afghanistan
-AF,     Islamic Republic of Afghanistan
-{snip}
-
-Equivalent JSON:
-----------------
-
-[{
-  "country": "AD",
-  "name": [ "Andorra", "Principality of Andorra" ]
-},{
-  "country": "AF",
-  "name": [ "Afghanistan", "Islamic Republic of Afghanistan" ]
-}]
- -

Second, a single row within a tabular data set may contain multiple values for a given property - by declaring that multiple columns map to the same property. For example, multiple locations:

- - -
Example 54
CSV:
-----
-
-geocode #1,geocode #2,geocode #3
-    020503,          ,
-    060107,    060108,
-    173219,          ,
-    530012,    530013,    530015
-    279333,          ,
-
-Equivalent RDF (in Turtle syntax):
-----------------------------------
-
-row:1 admingeo:gssCode ex:020503 .
-row:2 admingeo:gssCode ex:060107, ex:060108 .
-row:3 admingeo:gssCode ex:173219 .
-row:4 admingeo:gssCode ex:530012, ex:530013, ex:530015 .
-row:5 admingeo:gssCode ex:279333 .
- -
Note
-

In this case, it is essential to declare that each of the columns refer to the same property. In the - example above, all the geocode columns in the example above map to - admin:gssCode.

-
- -

Finally, microsyntax may provide a list of values within a single - cell. For example, a semi-colon ";" delimited list of comments about the characteristics - of a tree within a municipal database:

- -
Example 55
CSV:
-----
-
-GID,Tree ID, On Street,From Street,To Street,             Species,[...],Comments
-  6,     34,ADDISON AV, EMERSON ST,RAMONA ST,Robinia pseudoacacia,[...],cavity or decay; trunk decay; codominant leaders; included bark; large leader or limb decay; previous failure root damage; root decay;  beware of BEES.
-{snip}
-
-Equivalent JSON:
-----------------
-
-[{
-  "GID": "6",
-  "Tree_ID": "34",
-  "On_Street": "ADDISON AV",
-  "From_Street": "EMERSON ST",
-  "To_Street": "RAMONA ST",
-  "Species": "Robinia pseudoacacia",
-  "Comments": [ "cavity or decay", "trunk decay", "codominant leaders", "included bark", "large leader or limb decay", "previous failure root damage", "root decay", "beware of BEES."]
-}]
- -

Note that the example above is based on the Palo Alto tree data use case; albeit truncated for clarity.

- -
Note

In writing this requirement, no assumption has been made regarding how the repeated values should be implemented in RDF, JSON or XML.

- -

Motivation: - JournalArticleSearch, - PaloAltoTreeData, - SupportingSemantic-basedRecommendations and - CollatingHumanitarianResponseInformation. -

- -
Note
-

Within an annotate table, the values of cells can be considered as RDF subject-predicate-object triples (see [rdf11-concepts]). The about URL annotation may be used to define the subject of the triple derived from a cell, and, where the same about URL annotation is used for every cell within a row, the resource identified by the about URL annotation can be considered to be the subject of the row.

-

The same about URL annotation can be used to describe cells in more than one row, thus enabling information about a single subject to be spread across multiple rows.

-

Similarly, the property URL annotation may be used to define the predicate of the triple derived from a cell. The same property URL annotation may be used for multiple columns, meaning that multiple values of a single property can be provided across multiple columns.

-

Finally, note that arrays of values may be provided by a single cell. Please refer to requirement R-CellMicrosyntax for further details.

-
-
-
-
-
-
-

3.2 Partially accepted requirements

-
-

3.2.1 Data model requirements

-
-
R-CellMicrosyntax
-
- - Ability to parse internal data structure within a cell value - -

Cell values may represent more complex data structures for a given column such as lists and time stamps. - The presence of complex data structures within a given cell is referred to as microsyntax.

-

If present parsers should have the option of handling the microsyntax or ignoring it and treating it as - a scalar value. -

-

Looking in further detail at the uses of microsyntax, four types of usage are prevalent:

-
    -
  1. various date/time syntaxes (not just ISO-8601 ones)
  2. -
  3. delimited lists of literal values to express multiple values of the same property - (typically comma "," delimited, but other delimiters are also used)
  4. -
  5. embedded structured data such as XML, JSON or - well-known-text (WKT) literals
  6. -
  7. semi structured text
  8. -
-

The following requirements pertain to describing and parsing microsyntax:

-
    -
  • to document microsyntax so that humans can understand - what it is conveying; e.g. to provide human-readable annotation
  • -
  • to validate the cell values to ensure they conform to the expected microsyntax
  • -
  • to label the value as being in a particular microsyntax when converting into - JSON/XML/RDF; e.g. marking an XML value as an XMLLiteral or a datetime - value as xsd:dateTime
  • -
  • to process the microsyntax into an appropriate data structure when converting - into JSON/XML/RDF
  • -
-

The ability to declare that a column within a tabular data file carries values of a - particular type, and the potential validation of the cell against the declared type, - is covered in R-SyntacticTypeDefinition - and is not discussed further here.

- -

We can consider cell values with microsyntax to be annotated strings. The annotation (which might - include a definition of the format of the string - such as defining the delimiter used for a list) - can be used to validate the string and (in some cases) convert it into a suitable value or data - structure.

-

Microsyntax, therefore, requires manipulation of the text if processed. Typically, this will - relate to conversion of lists into multiple-valued entries, but may also include reformatting of - text to convert between formats (e.g. to convert a datetime value to a date, or locale dates to - ISO 8601 compliant syntax).

- -

Motivation: - JournalArticleSearch, - PaloAltoTreeData, - SupportingSemantic-basedRecommendations, - ExpressingHierarchyWithinOccupationalListings and - PlatformIntegrationUsingSTDF. -

- -
Note
-

This specification indicates how applications should provide support for validating the format, or syntax, of the literal content provided in cells. [tabular-data-model] section 6.4 Parsing Cells describes validation of formats for numeric datatypes, boolean, dates, times, and durations.

-

Please refer to R-SyntacticTypeDefinition for details of the associated requirement.

-

A regular expression, with syntax and processing as defined in [ECMASCRIPT], may be used to validate the format of a string value. In this way, the syntax of embedded structured data (e.g. html, json, xml and well known text literals) can be validated.

-

However, support for the extraction of values from structured data is limited to the parsing the cell content to extract an array of values. Parsers must use the value of the separator annotation, as specified in [tabular-data-model], to split the literal content of the cell. All values within the array are considered to be of the same datatype.

-

This functionality meets the needs of 4 out of 5 motivating requirements:

-
    -
  • JournalArticleSearch: date-time formats dealt with as a native datatype and the list of authors is treated as an array. The journal title does contain html markup (e.g. the <i> html element) but the use case indicates that it is acceptable to treat this as literal text.
  • -
  • PaloAltoTreeData: list of comments delimited with semi-colon (";") are mapped to an array of values.
  • -
  • SupportingSemantic-basedRecommendations: the 'semantic paths' are a comma delimited lit of URIs which are mapped to an array of values. The use case does not indicate that different semantics need to be applied to each value in the array.
  • -
  • PlatformIntegrationUsingSTDF: escape sequences for 'special characters' are not supported, but the use case indicates that "these special characters don't affect the parsing" so are considered not to be a microsyntax from which separate data values are to be extracted.
  • -
-

This specification does not natively meet the requirement to extract values from other structured data formats; the Working Group deemed this to add significant complexity to both specification and conforming applications.

-

That said, an annotated table may specify transformations which define a list of specifications for converting the associated annotated table into other formats using a script or template such as Mustache. These scripts or templates may be used to extract values from structured data, operating on the annotated table itself, the RDF graph provided from transforming the annotated table into RDF using standard mode (as specified in [csv2rdf]), or the JSON provided when using the standard mode specified in [csv2json]. Transformation specifications are defined in [tabular-metadata] section 5.10 Transformation Definitions.

-

Use case ExpressingHierarchyWithinOccupationalListings requires the extraction of values from substrings within cell values (e.g. different parts of the structured occupation code). Such processing may be achievable using scripts or templates which can be specified using a transformation definition.

-
-
-
R-CsvAsSubsetOfLargerDataset
-
- - Ability to assert how a single CSV file is a facet or subset of a larger - dataset - -

A large tabular dataset may be split into several files for publication; perhaps to ensure that - each file is a manageable size or to publish the updates to a dataset during the (re-)publishing cycle. It shall - be possible to declare that each of the files is part of the larger dataset and to describe what - content can be found within each file in order to allow users to rapidly find the particular file - containing the information they are interested in.

-

- Motivation: - SurfaceTemperatureDatabank, - PublicationOfPropertyTransactionData, - JournalArticleSearch, - ChemicalImaging and - NetCdFcDl. -

-
Note
-

This specification provides only a simple grouping mechanism to relate annotated tables, as described in [tabular-data-model] section 4.1 Table groups. Large tabular datasets may be subdivided into smaller parts for easier management. Each of the smaller parts may be related to each other using a group of tables.

-

However, no mechanism is provided for describing the relationship between tables other than simple grouping. Other specifications, such as [vocab-data-cube] and [void], provide mechanisms to describe subsets of data that can be used to meet this requirement. Such descriptions can be included as metadata annotations in the form of notes.

-
-
-
-
-
-
-

3.3 Deferred requirements

-
-

3.3.1 CSV parsing requirements

-
-
R-WellFormedCsvCheck
-
- - Ability to determine that a CSV is syntactically well formed - -

In order to automate the parsing of information published in CSV form, it is - essential that that content be well-formed with respect to the - syntax for tabular data [tabular-data-model].

-

- Motivation: - DigitalPreservationOfGovernmentRecords, - OrganogramData, - ChemicalImaging, - ChemicalStructures, - NetCdFcDl, - PaloAltoTreeData, - CanonicalMappingOfCSV, - IntelligentlyPreviewingCSVFiles, - MakingSenseOfOtherPeoplesData and - ConsistentPublicationOfLocalAuthorityData. -

-
Note

- This requirement has been deferred as normative specification for parsing CSV is outside the scope of the Working Group charter. [tabular-data-model] does provide non-normative definition of parsing of CSV files, including flexibility to parse tabular data that does not use commas as separators. -

-
-
R-MultipleHeadingRows
-
- - Ability to handle headings spread across multiple initial rows, as well as to distinguish between single column headings and file headings. - -

Row headings should be distinguished from file headings (if present). Also, in case subheadings are present, it should be possible to define their coverage - (i.e. how many columns they refer to).

-

- Motivation: - PublicationOfNationalStatistics, - AnalyzingScientificSpreadsheets, - IntelligentlyPreviewingCSVFiles, - CollatingHumanitarianResponseInformation, - ExpressingHierarchyWithinOccupationalListings and - PlatformIntegrationUsingSTDF. -

-
Note

- The Working Group decided to rule headings spanning multiple columns out of scope. However, it is possible to skip initial rows that do not contain header information using skipRows and to specify that a table contains multiple header rows using headerRowCount when describing a dialect, as described in [tabular-metadata]. -

-
-
R-TableNormalization
-
- - Ability to transform data that is published in a normalized form into tabular data. - -

Textual data may be published in a normalized form; often improving human readability by reducing the number of lines in the data file. As a result, such a normalized data file will no longer be regular as additional informtion is included in each row (e.g., the number of columns will vary because more cells are provided for some rows).

-
Note
-

Use of the term normalized is meant in a general sense, rather than the specific meaning relavant to relational databases.

-
-

Such a normalized data file must be transformed into a tabular data file, as defined by the model for tabular data [tabular-data-model], prior to applying any further transformation.

-

- Motivation: - RepresentingEntitiesAndFactsExtractedFromText. -

-
Note
-

The motivating use case is an example where we have a CSV file that is not well-formed - in this particular case, the number of columns varies row by row and therefore does not conform to the model for tabular data [tabular-data-model].

-

The ability to transform a data file into a tabular data file is a necessary prerequisite for any subsequent transformation. That said, such a transformation is outside the scope of this Working Group as it requires a parsing a data file with any structure.

-

Such pre-processing to create a tabular data file from a given structure is - likely to be reasonably simple for a programmer to implement, but it cannot be generalised.

-
-
-
-
-
-

3.3.2 Applications requirements

-
-
R-RandomAccess
-
- - Ability to access and/or extract part of a CSV file in a non-sequential manner. - -

Large datasets may be hard to process in a sequential manner. It may be useful to have the possibility to directly access part of them, possibly by means of a pointer to a given row, cell or region.

-

- Motivation: - SupportingSemantic-basedRecommendations. -

-
Note

- A standardised mechanism for querying tabular data is outside the scope of the Working Group. However, it is possible to use fragment identifiers as defined in [RFC7111] to identify columns, rows, cells, and regions of CSV files, and sufficient information is kept in the tabular data model to ensure that this ability is retained. -

-
-
R-CsvToXmlTransformation
-
- - Ability to transform a CSV into XML - -

Standardised CSV to XML transformation mechanisms mitigate the need for bespoke transformation software to be developed by CSV data consumers, - thus simplifying the exploitation of CSV data.

-

- Motivation: - DigitalPreservationOfGovernmentRecords. -

-
Note
-

Although the charter of the Working Group includes a work item for CSV to XML conversion, this requirement has unfortunately been deferred. The Working Group was unable to find XML experts to assist in delivery of this work item. The lack of available effort combined with motivation for this requirement being provided by a single use case only meant that the Working Group was forced to abandon this deliverable.

-
-
-
R-ConditionalProcessingBasedOnCellValues
-
- - Ability to apply conditional processing based on the value of a specific cell - -

When transforming CSV content into XML, JSON or RDF it shall be possible to vary the transformation of the information in a particular row based on the values within a cell, or element within a cell, contained within that row.

-

To vary the transformation based on an element within a cell, the value of that cell must be well structured. See CellMicrosyntax for more information.

-

- Motivation: - ExpressingHierarchyWithinOccupationalListings. -

-
Note
-

The ability to control the processing of tabular data based on values in a particular cell is not natively supported by this specification. Following detailed analysis, the Working Group concluded that such functionality would add significant complexity to the specification and implementing applications. However, an annotated table may specify transformations which define a list of specifications for converting the associated annotated table into other formats using a script or template such as Mustache. These scripts or templates may be used to provide conditional processing, operating on the annotated table itself, the RDF graph provided from transforming the annotated table into RDF using standard mode (as specified in [csv2rdf]), or the JSON provided when using the standard mode specified in [csv2json]. Transformation specifications are defined in [tabular-metadata] section 5.10 Transformation Definitions.

-
-
-
-
-
-
- -
-

A. Acknowledgements

-
At the time of publication, the following individuals had participated in the Working Group, in the order of their first name: - Adam Retter, - Alf Eaton, - Anastasia Dimou, - Andy Seaborne, - Axel Polleres, - Christopher Gutteridge, - Dan Brickley, - Davide Ceolin, - Eric Stephan, - Erik Mannens, - Gregg Kellogg, - Ivan Herman, - Jeni Tennison, - Jeremy Tandy, - Jürgen Umbrich, - Rufus Pollock, - Stasinos Konstantopoulos, - William Ingram, and - Yakov Shafranovich. -
-
- -
-

B. Changes since previous versions

-
-

B.1 Changes since working draft of 01 July 2014

- -
-
-

B.2 Changes since first public working draft of 27 March 2014

- -
+
+ [CONTENT]
- -

C. References

C.1 Normative references

[BCP47]
A. Phillips; M. Davis. Tags for Identifying Languages. September 2009. IETF Best Current Practice. URL: https://tools.ietf.org/html/bcp47 -
[csv2json]
Jeremy Tandy; Ivan Herman. Generating JSON from Tabular Data on the Web. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/csv2json/ -
[csv2rdf]
Jeremy Tandy; Ivan Herman; Gregg Kellogg. Generating RDF from Tabular Data on the Web. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/csv2rdf/ -
[tabular-data-model]
Jeni Tennison; Gregg Kellogg. Model for Tabular Data and Metadata on the Web. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/tabular-data-model/ -
[tabular-metadata]
Jeni Tennison; Gregg Kellogg. Metadata Vocabulary for Tabular Data. 17 December 2015. W3C Recommendation. URL: http://www.w3.org/TR/tabular-metadata/ -

C.2 Informative references

[ECMASCRIPT]
ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ -
[RFC3986]
T. Berners-Lee; R. Fielding; L. Masinter. Uniform Resource Identifier (URI): Generic Syntax. January 2005. Internet Standard. URL: https://tools.ietf.org/html/rfc3986 -
[RFC4180]
Y. Shafranovich. Common Format and MIME Type for Comma-Separated Values (CSV) Files. October 2005. Informational. URL: https://tools.ietf.org/html/rfc4180 -
[RFC6570]
J. Gregorio; R. Fielding; M. Hadley; M. Nottingham; D. Orchard. URI Template. March 2012. Proposed Standard. URL: https://tools.ietf.org/html/rfc6570 -
[RFC7111]
M. Hausenblas; E. Wilde; J. Tennison. URI Fragment Identifiers for the text/csv Media Type. January 2014. Informational. URL: https://tools.ietf.org/html/rfc7111 -
[RFC7159]
T. Bray, Ed.. The JavaScript Object Notation (JSON) Data Interchange Format. March 2014. Proposed Standard. URL: https://tools.ietf.org/html/rfc7159 -
[curie]
Mark Birbeck; Shane McCarron. CURIE Syntax 1.0. 16 December 2010. W3C Note. URL: http://www.w3.org/TR/curie -
[geosparql]
OGC GeoSPARQL - A Geographic Query Language for RDF Data. OpenGIS Implementation Specification. URL: https://portal.opengeospatial.org/files/?artifact_id=47664 -
[json-ld]
Manu Sporny; Gregg Kellogg; Markus Lanthaler. JSON-LD 1.0. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/json-ld/ -
[rdf11-concepts]
Richard Cyganiak; David Wood; Markus Lanthaler. RDF 1.1 Concepts and Abstract Syntax. 25 February 2014. W3C Recommendation. URL: http://www.w3.org/TR/rdf11-concepts/ -
[tabular-data-primer]
Jeni Tennison. CSV on the Web: A Primer. W3C Note. URL: http://www.w3.org/TR/2016/NOTE-tabular-data-primer-20160225/ -
[turtle]
Eric Prud'hommeaux; Gavin Carothers. RDF 1.1 Turtle. 25 February 2014. W3C Recommendation. URL: http://www.w3.org/TR/turtle/ -
[vocab-data-cube]
Richard Cyganiak; Dave Reynolds. The RDF Data Cube Vocabulary. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/vocab-data-cube/ -
[void]
Keith Alexander; Richard Cyganiak; Michael Hausenblas; Jun Zhao. Describing Linked Datasets with the VoID Vocabulary. 3 March 2011. W3C Note. URL: http://www.w3.org/TR/void/ -
[xml]
Tim Bray; Jean Paoli; Michael Sperberg-McQueen; Eve Maler; François Yergeau et al. Extensible Markup Language (XML) 1.0 (Fifth Edition). 26 November 2008. W3C Recommendation. URL: http://www.w3.org/TR/xml -
[xmlschema11-2]
David Peterson; Sandy Gao; Ashok Malhotra; Michael Sperberg-McQueen; Henry Thompson; Paul V. Biron et al. W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes. 5 April 2012. W3C Recommendation. URL: http://www.w3.org/TR/xmlschema11-2/ -
\ No newline at end of file + \ No newline at end of file diff --git a/test/docs/metadata/mediacapture-depth.html b/test/docs/metadata/mediacapture-depth.html index fac5bef61..94e4a3f17 100644 --- a/test/docs/metadata/mediacapture-depth.html +++ b/test/docs/metadata/mediacapture-depth.html @@ -803,658 +803,9 @@

A. Acknowledgements
  • B. References
  • - -
    -

    1. - Introduction -

    -

    - Depth cameras are increasingly being integrated into devices such as - phones, tablets, and laptops. Depth cameras provide a depth map, - which conveys the distance information between points on an object's - surface and the camera. With depth information, web content and - applications can be enhanced by, for example, the use of hand gestures - as an input mechanism, or by creating 3D models of real-world objects - that can interact and integrate with the web platform. Concrete - applications of this technology include more immersive gaming - experiences, more accessible 3D video conferences, and augmented - reality, to name a few. -

    -

    - To bring depth capability to the web platform, this specification - extends - the MediaStream interface [GETUSERMEDIA] to - enable it to also contain depth-based - MediaStreamTracks. A depth-based - MediaStreamTrack, referred to as a depth stream - track, represents an abstraction of a stream of frames that can - each be converted to objects which contain an array of pixel data, - where each pixel represents the distance between the camera and the - objects in the scene for that point in the array. A - MediaStream object that contains one or more - depth stream tracks is referred to as a depth-only stream - or depth+video stream. -

    -

    - Depth cameras usually produce 16-bit depth values per pixel. However, - neither the canvas drawing surface used to draw and manipulate 2D - graphics on the web platform nor the ImageData - interface used to represent image data support 16 bits per pixel. To - address the issue, this specification defines a conversion into a 8-bit - grayscale representation of a depth map for consumption by APIs - that are limited to 8 bits per pixel. -

    -

    - The Media Capture Stream with Worker specification - [MEDIACAPTURE-WORKER] that complements this specification enables - processing of 16-bit depth values per pixel directly in a worker - environment and makes the <video> and - <canvas> indirection and depth-to-grayscale - conversion redundant. This alternative pipeline that supports greater - bit depth and does not incur the performance penalty of the indirection - and conversion enables more advanced use cases. -

    -
    -
    -

    2. - Use cases and requirements -

    -

    - This specification attempts to address the Use - Cases and Requirements for accessing depth stream from a depth - camera. See also the - Examples section for concrete usage examples. -

    -
    -

    3. Conformance

    -

    - As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, - and notes in this specification are non-normative. Everything else in this specification is - normative. -

    -

    The key words MUST and MUST NOT are - to be interpreted as described in [RFC2119]. -

    -

    - This specification defines conformance criteria that apply to a single - product: the user agent that implements the interfaces that - it contains. -

    -

    - Implementations that use ECMAScript to implement the APIs defined in - this specification must implement them in a manner consistent with the - ECMAScript Bindings defined in the Web IDL specification [WEBIDL], - as this specification uses that specification and terminology. -

    -
    -
    -

    4. - Dependencies -

    -

    - The - MediaStreamTrack and - MediaStream interfaces this specification extends - are defined in [GETUSERMEDIA]. -

    -

    - The - Constraints, - MediaStreamConstraints, - MediaTrackSettings, - MediaTrackConstraints, - MediaTrackSupportedConstraints, - MediaTrackCapabilities, and - MediaTrackConstraintSet dictionaries this - specification extends are defined in [GETUSERMEDIA]. -

    -

    - The - getUserMedia(), applyConstraints(), - getSettings() - methods and the - NavigatorUserMediaSuccessCallback callback are - defined in [GETUSERMEDIA]. -

    -

    - The concepts muted - and disabled - as applied to MediaStreamTrack are defined in [GETUSERMEDIA]. -

    -

    - The terms source - and consumer - are defined are [GETUSERMEDIA]. -

    -

    - The SourceTypeEnum - and - MediaDeviceKind enumerations are defined in - [GETUSERMEDIA]. -

    -

    - The ImageData - and VideoTrack - interfaces are defined in [HTML]. -

    -
    -
    -

    5. - Terminology -

    -

    - The term depth+video stream means a MediaStream - object that contains one or more MediaStreamTrack objects of - kind "depth" (depth stream track) and one or more - MediaStreamTrack objects of kind "video" (video - stream track). -

    -

    - The term depth-only stream means a MediaStream object - that contains one or more MediaStreamTrack objects of kind - "depth" (depth stream track) only. -

    -

    - The term video-only stream means a MediaStream object - that contains one or more MediaStreamTrack objects of kind - "video" (video stream track) only, and optionally - of kind "audio". -

    -

    - The term depth stream track means a MediaStreamTrack - object whose kind is "depth". It represents a media stream - track whose source is a depth camera. -

    -

    - The term video stream track means a MediaStreamTrack - object whose kind is "video". It represents a media stream - track whose source is a video camera. -

    -
    -

    5.1 - Depth map -

    -

    - A depth map is an abstract representation of a frame of a - depth stream track. A depth map is an image that - contains information relating to the distance of the surfaces of - scene objects from a viewpoint. -

    -

    - A depth map has an associated focal length which is - a double. It represents the focal length of the camera in - millimeters. -

    -

    - A depth map has an associated horizontal field of - view which is a double. It represents the horizontal angle of - view in degrees. -

    -

    - A depth map has an associated vertical field of - view which is a double. It represents the vertical angle of - view in degrees. -

    -

    - A depth map has an associated unit which is a - string. It represents the active depth map unit. -

    -

    - A depth map has an associated near value which is a - double. It represents the minimum range in active depth map - units. -

    -

    - A depth map has an associated far value which is a - double. It represents the maximum range in active depth map - units. -

    -
    -
    -
    -

    6. - Extensions -

    -
    -

    6.1 - MediaStreamConstraints dictionary -

    -
    partial dictionary MediaStreamConstraints {
    -    (boolean or MediaTrackConstraints) depth = false;
    -};
    -

    - If the depth dictionary member has the value - true, the MediaStream returned by the getUserMedia() - method MUST contain a depth stream track. If the depth - dictionary member is set to false, is not provided, or is set to - null, the MediaStream MUST NOT contain a depth stream - track. -

    -

    - If active depth map unit is provided in - MediaTrackConstraints, let that unit be the active depth - map unit for the returned depth stream track. -

    -
    Note
    - If the user agent requests a combined depth+video stream, the - devices in the constraint should be satisfied as belonging to the - same group or physical device. The decision to select and satisfy - which device pair is left up to the implementation. -
    -
    -
    -

    6.2 - MediaStream interface -

    -
    partial interface MediaStream {
    -    sequence<MediaStreamTrack> getDepthTracks();
    -};
    -

    - The getDepthTracks() method, when invoked, - MUST return a sequence of depth - stream tracks in this stream. -

    -

    - The getDepthTracks() method MUST return a - sequence that represents a snapshot of all the - MediaStreamTrack objects in this stream's track - set whose kind is equal to "depth". - The conversion from the track set to the sequence is user - agent defined and the order does not have to be stable between - calls. -

    -

    - The MediaStream consumer for the depth-only - stream and depth+video stream is the video element [HTML]. -

    -
    Note
    - New consumers may be added in a future version of this - specification. -
    -
    -

    6.2.1 - Implementation considerations -

    This section is non-normative.

    -

    - A video stream track and a depth stream track can be - combined into one depth+video stream. The rendering of the - two tracks are intended to be synchronized. The resolution of the - two tracks are intended to be same. And the coordination of the two - tracks are intended to be calibrated. These are not hard - requirements, since it might not be possible to synchronize tracks - from sources. -

    -
    -
    -
    -

    6.3 - MediaStreamTrack interface -

    -

    - The kind attribute MUST, on getting, return - the string "depth" if the object represents a depth - stream track. -

    -

    - If a MediaStreamTrack of kind "depth" is - muted or disabled, it MUST render black frames, or a - zero-information-content equivalent. -

    -

    - The string "depth" is the SourceTypeEnum value - for the source that is a local depth camera source. -

    -
    -
    -

    6.4 - MediaDeviceInfo interface -

    -

    - The string "depthinput" is the MediaDeviceKind - value for the depth camera input device. -

    -
    -
    -

    6.5 - Media provider object -

    -

    - A media - provider object can represent a depth-only stream (and - specifically, not a depth+video stream). The user agent - MUST support a media element with - an assigned - media provider object that is a depth-only stream, and in - particular, the srcObject - IDL attribute that allows the media element to be - assigned a media provider - object MUST, on setting and getting, behave as specified in - [HTML]. -

    -
    -
    -

    6.6 - The video element -

    -

    - For a video - element whose assigned - media provider object is a depth-only stream, the user - agent MUST, for each pixel of the media data that is - represented by a depth map, convert the depth map value to - grayscale prior to when the video element is - potentially - playing. -

    -

    - For a video - element whose assigned - media provider object is a depth+video stream, the user - agent MUST act as if all the MediaStreamTracks of kind - "depth" were removed prior to when the - video element is potentially - playing. -

    -

    - The algorithm to convert the depth map value to grayscale, - given a depth map value d, is as follows: -

    -
      -
    1. Let bit depth be the bit depth of the depth map. -
    2. -
    3. Let near be the the near value. -
    4. -
    5. Let far be the the far value. -
    6. -
    7. If bit depth is greater than 8, then apply the - rules to convert using range inverse to d to obtain - quantized value d8bit. -
    8. -
    9. Otherwise, apply the rules to convert using range linear - to d to obtain quantized value - d8bit. -
    10. -
    11. Return d8bit. -
    12. -
    -

    - The rules to convert using range inverse are as given in - the following formula: -

    Range inverse
    - Quantization -

    - The rules to convert using range linear are as given in - the following formula: -

    Range linear
    - Quantization -
    -

    6.6.1 - VideoTrack interface -

    -

    - For each depth stream track in the depth-only stream, - the user agent MUST create a corresponding VideoTrack - as defined in [HTML]. -

    -
    -
    -
    -

    6.7 - MediaTrackSettings dictionary -

    -

    - When the getSettings() method is invoked on a depth stream - track, the user agent MUST return the following dictionary - that extends the MediaTrackSettings dictionary: -

    -
    enum RangeFormat {
    -    "inverse",
    -    "linear"
    -};
    -
    -partial dictionary MediaTrackSettings {
    -    double        focalLength;
    -    RangeFormat   format;
    -    double        horizontalFieldOfView;
    -    double        verticalFieldOfView;
    -    DepthMapUnit? depthMapUnit;
    -    double        near;
    -    double        far;
    -};
    -
    -

    - The focalLength dictionary member - represents the depth map's focal length. -

    -

    - The format dictionary member represents the - depth to grayscale conversion method applied to the depth - map in the convert the depth map value to grayscale - algorithm. The RangeFormat enumeration represents the - possible values. If the value is "inverse", the rules to convert using range - inverse have been applied, and if the value is "linear", the rules to convert using range - linear have been applied. -

    -

    - The horizontalFieldOfView dictionary member - represents the depth map's horizontal field of view. -

    -

    - The verticalFieldOfView dictionary member - represents the depth map's vertical field of view. -

    -

    - The depthMapUnit dictionary member represents - the active depth map unit. -

    -

    - The near dictionary member represents the - depth map's near value. -

    -

    - The far dictionary member represents the - depth map's far value. -

    -
    -
    -
    -

    6.8 - WebGLRenderingContext interface -

    -
    -

    6.8.1 - Implementation considerations -

    This section is non-normative.

    -

    - A video element whose source is a - MediaStream object containing a depth stream - track may be uploaded to a WebGL texture of format - RGB and type UNSIGNED_BYTE. [WEBGL] -

    -

    - For each pixel of this WebGL texture, the R component represents - the lower 8 bit value of 16 bit depth value, the G component - represents the upper 8 bit value of 16 bit depth value and the - value in B component is not defined. -

    -
    -
    -
    -

    6.9 - depthMapUnit constrainable property -

    -

    - The depthMapUnit constrainable property is defined to apply - only to depth stream tracks. -

    - - - - - - - - - - - - - - - -
    - Property name - - Values - - Notes -
    - depthMapUnit - - DOMString - - This property is used for setting the initial active depth - map unit when the getUserMedia() method is invoked, - and is not applicable for subsequent media control. -
    -

    - The applyConstraints() method MUST reject the promise with - OverconstrainedError, when invoked with - depthMapUnit property. -

    -
    enum DepthMapUnit {
    -    "mm",
    -    "m"
    -};
    -

    - The DepthMapUnit enumeration represents the - possible units for a depth map. The "mm" value indicates millimeters, the "m" value indicates meters. -

    -
    partial dictionary MediaTrackConstraints {
    -    DepthMapUnit unit = "mm";
    -};
    -
    -partial dictionary MediaTrackConstraintSet {
    -    ConstrainBoolean unit;
    -};
    -

    - The depthMapUnit of MediaTrackConstraints - is said to be the active depth map unit for the depth - stream track, when getUserMedia() invocation has - succeeded. -

    -
    partial dictionary MediaTrackSupportedConstraints {
    -    boolean unit = true;
    -};
    -
    -partial dictionary MediaTrackCapabilities {
    -    DepthMapUnit unit;
    -};
    -
    -
    -
    -

    7. - Examples -

    This section is non-normative.

    -

    - Playback of depth+video stream -

    -
    Example 1
    navigator.mediaDevices.getUserMedia({
    -  depth: true,
    -  video: true
    -}).then(function (stream) {
    -    // Wire the media stream into a <video> element for playback.
    -    // The RGB video is rendered.
    -    var video = document.querySelector('#video');
    -    video.srcObject = stream;
    -    video.play();
    -
    -    // Construct a depth-only stream out of the existing depth stream track.
    -    var depthOnlyStream = new MediaStream(s.getDepthTracks()[0]);
    -
    -    // Wire the depth-only stream into another <video> element for playback.
    -    // The depth information is rendered in its grayscale representation.
    -    var depthVideo = document.querySelector('#depthVideo');
    -    depthVideo.srcObject = depthOnlyStream;
    -    depthVideo.play();
    -  }
    -);
    -

    - WebGL Fragment Shader based post-processing -

    -
    Example 2
    // This code sets up a video element from a depth stream, uploads it to a WebGL
    -// texture, and samples that texture in the fragment shader, reconstructing the
    -// 16-bit depth values from the red and green channels.
    -navigator.mediaDevices.getUserMedia({
    -  depth: true,
    -}).then(function (stream) {
    -  // wire the stream into a <video> element for playback
    -  var depthVideo = document.querySelector('#depthVideo');
    -  depthVideo.srcObject = stream;
    -  depthVideo.play();
    -}).catch(function (reason) {
    -  // handle gUM error here
    -});
    -
    -// ... later, in the rendering loop ...
    -gl.texImage2D(
    -   gl.TEXTURE_2D,
    -   0,
    -   gl.RGB,
    -   gl.RGB,
    -   gl.UNSIGNED_BYTE,
    -   depthVideo
    -);
    -
    -<script id="fragment-shader" type="x-shader/x-fragment">
    -  varying vec2 v_texCoord;
    -  // u_tex points to the texture unit containing the depth texture.
    -  uniform sampler2D u_tex;
    -  uniform float far;
    -  uniform float near;
    -  uniform bool isRangeInverse;
    -  void main() {
    -    vec4 floatColor = texture2D(u_tex, v_texCoord);
    -    float dn = floatColor.r;
    -    float depth = 0.;
    -    if (isRangeInverse) {
    -      depth = far * near / ( far - dn * ( far - near));
    -    } else {
    -      // Otherwise, using range linear
    -      depth = dn * ( far - near ) + near;
    -    }
    -    // ...
    -  }
    -</script>
    +
    + [CONTENT]
    -
    -

    8. - Privacy and security considerations -

    This section is non-normative.

    -

    - The - privacy and security considerations discussed in [GETUSERMEDIA] - apply to this extension specification. -

    -
    -
    -

    A. - Acknowledgements -

    -

    - Thanks to everyone who contributed to the Use - Cases and Requirements, sent feedback and comments. Special thanks - to Ningxin Hu for experimental implementations, as well as to the - Project Tango for their experiments. -

    -
    - -

    B. References

    B.1 Normative references

    [GETUSERMEDIA]
    Daniel Burnett; Adam Bergkvist; Cullen Jennings; Anant Narayanan. Media Capture and Streams. 14 April 2015. W3C Last Call Working Draft. URL: http://www.w3.org/TR/mediacapture-streams/ -
    [HTML]
    Ian Hickson. HTML Standard. Living Standard. URL: https://html.spec.whatwg.org/multipage/ -
    [RFC2119]
    S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 -
    [WEBIDL]
    Cameron McCormack; Boris Zbarsky. WebIDL Level 1. 4 August 2015. W3C Working Draft. URL: http://www.w3.org/TR/WebIDL-1/ -

    B.2 Informative references

    [MEDIACAPTURE-WORKER]
    Chia-hung Tai; Robert O'Callahan; Tzuhao Kuo; Anssi Kostiainen. Media Capture Stream with Worker. W3C Editor's Draft. URL: https://w3c.github.io/mediacapture-worker/ -
    [WEBGL]
    Chris Marrin (Apple Inc.). WebGL Specification, Version 1.0. 10 February 2011. URL: https://www.khronos.org/registry/webgl/specs/1.0/ -
    + diff --git a/test/docs/metadata/tabular-data-model.html b/test/docs/metadata/tabular-data-model.html index 21d6fb931..54bfb5b57 100644 --- a/test/docs/metadata/tabular-data-model.html +++ b/test/docs/metadata/tabular-data-model.html @@ -695,2355 +695,9 @@

    Table of Contents

    - -
    -

    1. Introduction

    -

    - Tabular data is data that is structured into rows, each of which contains information about some thing. Each row contains the same number of cells (although some of these cells may be empty), which provide values of properties of the thing described by the row. In tabular data, cells within the same column provide values for the same property of the things described by each row. This is what differentiates tabular data from other line-oriented formats. -

    -

    - Tabular data is routinely transferred on the web in a textual format called CSV, but the definition of CSV in practice is very loose. Some people use the term to mean any delimited text file. Others stick more closely to the most standard definition of CSV that there is, [RFC4180]. Appendix A describes the various ways in which CSV is defined. This specification refers to such files, as well as tab-delimited files, fixed field formats, spreadsheets, HTML tables, and SQL dumps as tabular data files. -

    -

    - In section 4. Tabular Data Models, this document defines a model for tabular data that abstracts away from the varying syntaxes that are used for when exchanging tabular data. The model includes annotations, or metadata, about collections of individual tables, rows, columns, and cells. These annotations are typically supplied through separate metadata files; section 5. Locating Metadata defines how these metadata files can be located, while [tabular-metadata] defines what they contain. -

    -

    - Once an annotated table has been created, it can be processed in various ways, such as display, validation, or conversion into other formats. This processing is described in section 6. Processing Tables. -

    -

    - This specification does not normatively define a format for exchanging tabular data. However, it does provide some best practice guidelines for publishing tabular data as CSV, in section section 7. Best Practice CSV, and for parsing both this syntax and those similar to it, in section 8. Parsing Tabular Data. -

    -
    -

    2. Conformance

    -

    - As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, - and notes in this specification are non-normative. Everything else in this specification is - normative. -

    -

    The key words MAY, MUST, MUST NOT, SHOULD, and SHOULD NOT are - to be interpreted as described in [RFC2119]. -

    - -

    This specification makes use of the compact IRI Syntax; please refer to the Compact IRIs from [JSON-LD].

    - -

    This specification makes use of the following namespaces:

    -
    -
    csvw:
    -
    http://www.w3.org/ns/csvw#
    -
    dc:
    -
    http://purl.org/dc/terms/
    -
    rdf:
    -
    http://www.w3.org/1999/02/22-rdf-syntax-ns#
    -
    rdfs:
    -
    http://www.w3.org/2000/01/rdf-schema#
    -
    schema:
    -
    http://schema.org/
    -
    xsd:
    -
    http://www.w3.org/2001/XMLSchema#
    -
    - -
    -
    -

    3. Typographical conventions

    -

    The following typographic conventions are used in this specification:

    - -
    -
    markup
    -
    Markup (elements, attributes, properties), machine processable values (string, characters, media types), property name, or a file name is in red-orange monospace font.
    -
    variable
    -
    A variable in pseudo-code or in an algorithm description is in italics.
    -
    definition
    -
    A definition of a term, to be used elsewhere in this or other specifications, is in bold and italics.
    -
    definition reference
    -
    A reference to a definition in this document is underlined and is also an active link to the definition itself.
    -
    markup definition reference
    -
    A references to a definition in this document, when the reference itself is also a markup, is underlined, red-orange monospace font, and is also an active link to the definition itself.
    -
    external definition reference
    -
    A reference to a definition in another document is underlined, in italics, and is also an active link to the definition itself.
    -
    markup external definition reference
    -
    A reference to a definition in another document, when the reference itself is also a markup, is underlined, in italics red-orange monospace font, and is also an active link to the definition itself.
    -
    hyperlink
    -
    A hyperlink is underlined and in blue.
    -
    [reference]
    -
    A document reference (normative or informative) is enclosed in square brackets and links to the references section.
    -
    - -
    Note

    Notes are in light green boxes with a green left border and with a "Note" header in green. Notes are normative or informative depending on the whether they are in a normative or informative section, respectively.

    - -
    Example 1
    Examples are in light khaki boxes, with khaki left border, and with a 
    -numbered "Example" header in khaki. Examples are always informative. 
    -The content of the example is in monospace font and may be syntax colored.
    -
    -
    -
    -

    4. Tabular Data Models

    -

    - This section defines an annotated tabular data model: a model for tables that are annotated with metadata. Annotations provide information about the cells, rows, columns, tables, and groups of tables with which they are associated. The values of these annotations may be lists, structured objects, or atomic values. Core annotations are those that affect the behavior of processors defined in this specification, but other annotations may also be present on any of the components of the model. -

    -

    - Annotations may be described directly in [tabular-metadata], be embedded in a tabular data file, or created during the process of generating an annotated table. -

    -

    - String values within the tabular data model (such as column titles or cell string values) MUST contain only Unicode characters. -

    -
    Note

    - In this document, the term annotation refers to any metadata associated with an object in the annotated tabular data model. These are not necessarily web annotations in the sense of [annotation-model]. -

    -
    -

    4.1 Table groups

    -

    - A group of tables comprises a set of annotated tables and a set of annotations that relate to that group of tables. The core annotations of a group of tables are: -

    -
      -
    • id — an identifier for this group of tables, or null if this is undefined.
    • -
    • notes — any number of additional annotations on the group of tables. This annotation may be empty.
    • -
    • tables — the list of tables in the group of tables. A group of tables MUST have one or more tables.
    • -
    -

    - Groups of tables MAY in addition have any number of annotations which provide information about the group of tables. Annotations on a group of tables may include: -

    -
      -
    • titles or descriptions of the group of tables.
    • -
    • information about the source or provenance of the group of tables.
    • -
    • links to other groups of tables (e.g. to those that provide similar data from a different time period).
    • -
    - -

    When originating from [tabular-metadata], these annotations arise from common properties defined on table group descriptions within metadata documents.

    - -
    -
    -

    4.2 Tables

    -

    - An annotated table is a table that is annotated with additional metadata. The core annotations of a table are: -

    -
      -
    • columns — the list of columns in the table. A table MUST have one or more columns and the order of the columns within the list is significant and MUST be preserved by applications.
    • -
    • table direction — the direction in which the columns in the table should be displayed, as described in section 6.5.1 Bidirectional Tables; the value of this annotation may also become the value of the text direction annotation on columns and cells within the table, if the textDirection property is set to inherit (the default).
    • -
    • foreign keys — a list of foreign keys on the table, as defined in [tabular-metadata], which may be an empty list.
    • -
    • id — an identifier for this table, or null if this is undefined.
    • -
    • notes — any number of additional annotations on the table. This annotation may be empty.
    • -
    • rows — the list of rows in the table. A table MUST have one or more rows and the order of the rows within the list is significant and MUST be preserved by applications.
    • -
    • schema — a URL referencing a schema applied to this table, or null.
    • -
    • suppress output — a boolean that indicates whether or not this table should be suppressed in any output generated from converting the group of tables, that this table belongs to, into another format, as described in section 6.7 Converting Tables.
    • -
    • transformations — a (possibly empty) list of specifications for converting this table into other formats, as defined in [tabular-metadata].
    • -
    • url — the URL of the source of the data in the table, or null if this is undefined.
    • -
    -

    - The table MAY in addition have any number of other annotations. Annotations on a table may include: -

    -
      -
    • titles or descriptions of the table,
    • -
    • information about the source or provenance of the data in the table, or
    • -
    • links to other tables (e.g. to indicate tables that include related information).
    • -
    -

    When originating from [tabular-metadata], these annotations arise from common properties defined on table descriptions within metadata documents.

    -
    -
    -

    4.3 Columns

    -

    - A column represents a vertical arrangement of cells within a table. The core annotations of a column are: -

    -
      -
    • about URL — the about URL URI template used to create a URL identifier for each value of cell in this column relative to the row in which it is contained, as defined in [tabular-metadata].
    • -
    • cells — the list of cells in the column. A column MUST contain one cell from each row in the table. The order of the cells in the list MUST match the order of the rows in which they appear within the rows for the associated table.
    • -
    • datatype — the expected datatype for the values of cells in this column, as defined in [tabular-metadata].
    • -
    • default — the default value for cells whose string value is an empty string.
    • -
    • lang — the code for the expected language for the values of cells in this column, expressed in the format defined by [BCP47].
    • -
    • name — the name of the column.
    • -
    • null — the string or strings which cause the value of cells having string value matching any of these values to be null.
    • -
    • number — the position of the column amongst the columns for the associated table, starting from 1.
    • -
    • ordered — a boolean that indicates whether the order of values of a cell should be preserved or not.
    • -
    • property URL — the expected property URL URI template used to create a URL identifier for the property of each value of cell in this column relative to the row in which it is contained, as defined in [tabular-metadata].
    • -
    • required — a boolean that indicates that values of cells in this column MUST NOT be empty.
    • -
    • separator — a string value used to create multiple values of cells in this column by splitting the string value on the separator.
    • -
    • source number — the position of the column in the file at the url of the table, starting from 1, or null.
    • -
    • suppress output — a boolean that indicates whether or not this column should be suppressed in any output generated from converting the table, as described in section 6.7 Converting Tables.
    • -
    • table — the table in which the column appears.
    • -
    • text direction — the indicator of the text direction values of cells in this column, as described in section 6.5.1 Bidirectional Tables; the value of this annotation may be derived from the table direction annotation on the table, if the textDirection property is set to inherit (the default).
    • -
    • titles — any number of human-readable titles for the column, each of which MAY have an associated language code as defined by [BCP47].
    • -
    • value URL — the expected value URL URI template used to create the URL identifier for the value of each cell in this, as defined in [tabular-metadata].
    • -
    • virtual — a boolean that indicates whether the column is a virtual column. Virtual columns are used to extend the source data with additional empty columns to support more advanced conversions; when this annotation is false, the column is a real column, which exists in the source data for the table.
    • -
    -
    Note

    - Several of these annotations arise from inherited properties that may be defined within metadata on table group, table or individual column descriptions. -

    -

    - Columns MAY in addition have any number of other annotations, such as a description. When originating from [tabular-metadata], these annotations arise from common properties defined on column descriptions within metadata documents.

    -
    -
    -

    4.4 Rows

    -

    - A row represents a horizontal arrangement of cells within a table. The core annotations of a row are: -

    -
      -
    • cells — the list of cells in the row. A row MUST contain one cell from each column in the table. The order of the cells in the list MUST match the order of the columns in which they appear within the table columns for the row's table.
    • -
    • number — the position of the row amongst the rows for the table, starting from 1.
    • -
    • primary key — a possibly empty list of cells whose values together provide a unique identifier for this row. This is similar to the name of a column.
    • -
    • titles — any number of human-readable titles for the row, each of which MAY have an associated language code as defined by [BCP47].
    • -
    • referenced rows — a possibly empty list of pairs of a foreign key and a row in a table within the same group of tables (which may be another row in the table in which this row appears).
    • -
    • source number — the position of the row in the original url of the table, starting from 1, or null.
    • -
    • table — the table in which the row appears.
    • -
    -

    - Rows MAY have any number of additional annotations. The annotations on a row provide additional metadata about the information held in the row, such as: -

    -
      -
    • the certainty of the information in that row.
    • -
    • information about the source or provenance of the data in that row.
    • -
    -

    - Neither this specification nor [tabular-metadata] defines a method to specify such annotations. Implementations MAY define a method for adding annotations to rows by interpreting notes on the table. -

    -
    -
    -

    4.5 Cells

    -

    - A cell represents a cell at the intersection of a row and a column within a table. The core annotations of a cell are: -

    -
      -
    • about URL — an absolute URL for the entity about which this cell provides information, or null.
    • -
    • column — the column in which the cell appears; the cell MUST be in the cells for that column.
    • -
    • errors — a (possibly empty) list of validation errors generated while parsing the value of the cell.
    • -
    • ordered — a boolean that, if the value of this cell is a list, indicates whether the order of that list should be preserved or not.
    • -
    • property URL — an absolute URL for the property associated with this cell, or null.
    • -
    • row — the row in which the cell appears; the cell MUST be in the cells for that row.
    • -
    • string value — a string that is the original syntactic representation of the value of the cell, e.g. how the cell appears within a CSV file; this may be an empty string.
    • -
    • table — the table in which the cell appears.
    • -
    • text direction — which direction the text within the cell should be displayed, as described in section 6.5.1 Bidirectional Tables; the value of this annotation may be derived from the table direction annotation on the table, if the textDirection property is set to inherit (the default).
    • -
    • value — the semantic value of the cell; this MAY be a list of values, each of which MAY have a datatype other than a string, MAY have a language and MAY be null. For example, annotations might enable a processor to understand the string value of the cell as representing a number or a date. By default, if the string value is an empty string, the semantic value of the cell is null.
    • -
    • value URL — an absolute URL for this cell's value, or null.
    • -
    -
    Note

    - There presence or absence of quotes around a value within a CSV file is a syntactic detail that is not reflected in the tabular data model. In other words, there is no distinction in the model between the second value in a,,z and the second value in a,"",z. -

    -
    Note

    - Several of these annotations arise from or are constructed based on inherited properties that may be defined within metadata on table group, table or column descriptions. -

    -

    - Cells MAY have any number of additional annotations. The annotations on a cell provide metadata about the value held in the cell, particularly when this overrides the information provided for the column and row that the cell falls within. Annotations on a cell might be: -

    -
      -
    • notes to aid the interpretation of the value.
    • -
    • information about the source or provenance of the data in that cell.
    • -
    • indication of the units of measure used within a cell.
    • -
    -

    - Neither this specification nor [tabular-metadata] defines a method to specify such annotations. Implementations MAY define a method for adding annotations to cells by interpreting notes on the table. -

    -
    Note

    - Units of measure are not a built-in part of the tabular data model. However, they can be captured through notes or included in the converted output of tabular data through defining datatypes with identifiers that indicate the unit of measure, using virtual columns to create nested data structures, or using common properties to specify Data Cube attributes as defined in [vocab-data-cube]. -

    -
    -
    -

    4.6 Datatypes

    -

    - Columns and cell values within tables may be annotated with a datatype which indicates the type of the values obtained by parsing the string value of the cell. -

    -

    - Datatypes are based on a subset of those defined in [xmlschema11-2]. The annotated tabular data model limits cell values to have datatypes as shown on the diagram: -

    -
      -
    • the datatypes defined in [xmlschema11-2] as derived from and including xsd:anyAtomicType.
    • -
    • the datatype rdf:XMLLiteral, a sub-type of xsd:string, which indicates the value is an XML fragment.
    • -
    • the datatype rdf:HTML, a sub-type of xsd:string, which indicates the value is an HTML fragment.
    • -
    • the datatype csvw:JSON, a sub-type of xsd:string, which indicates the value is serialized JSON.
    • -
    • datatypes derived from any of these datatypes.
    • -
    -
    - Built-in Datatype Hierarchy diagram -
    Fig. 1 Diagram showing the built-in datatypes, based on [xmlschema11-2]; names in parentheses denote aliases to the [xmlschema11-2] terms (see the diagram in SVG or PNG formats)
    -
    -

    The core annotations of a datatype are:

    - -

    - If the id of a datatype is that of a built-in datatype, the values of the other core annotations listed above MUST be consistent with the values defined in [xmlschema11-2] or above. For example, if the id is xsd:integer then the base must be xsd:decimal. -

    -

    - Datatypes MAY have any number of additional annotations. The annotations on a datatype provide metadata about the datatype such as title or description. These arise from common properties defined on datatype descriptions within metadata documents, as defined in [tabular-metadata]. -

    -
    Note

    - The id annotation may reference an XSD, OWL or other datatype definition, which is not used by this specification for validating column values, but may be useful for further processing. -

    -
    -

    4.6.1 Length Constraints

    -

    - The length, minimum length and maximum length annotations indicate the exact, minimum and maximum lengths for cell values. -

    -

    - The length of a value is determined as defined in [xmlschema11-2], namely as follows: -

    -
      -
    • if the value is null, its length is zero.
    • -
    • if the value is a string or one of its subtypes, its length is the number of characters (ie [UNICODE] code points) in the value.
    • -
    • if the value is of a binary type, its length is the number of bytes in the binary value.
    • -
    -

    If the value is a list, the constraint applies to each element of the list.

    -
    -
    -

    4.6.2 Value Constraints

    -

    - The minimum, maximum, minimum exclusive, and maximum exclusive annotations indicate limits on cell values. These apply to numeric, date/time, and duration types. -

    -

    - Validation of cell values against these datatypes is as defined in [xmlschema11-2]. If the value is a list, the constraint applies to each element of the list. -

    -
    -
    -
    -
    -

    5. Locating Metadata

    -

    - As described in section 4. Tabular Data Models, tabular data may have a number of annotations associated with it. Here we describe the different methods that can be used to locate metadata that provides those annotations. -

    -

    - In the methods of locating metadata described here, metadata is provided within a single document. The syntax of such documents is defined in [tabular-metadata]. Metadata is located using a specific order of precedence: -

    -
      -
    1. metadata supplied by the user of the implementation that is processing the tabular data, see section 5.1 Overriding Metadata.
    2. -
    3. metadata in a document linked to using a Link header associated with the tabular data file, see section 5.2 Link Header.
    4. -
    5. metadata located through default paths which may be overridden by a site-wide location configuration, see section 5.3 Default Locations and Site-wide Location Configuration.
    6. -
    7. metadata embedded within the tabular data file itself, see section 5.4 Embedded Metadata.
    8. -
    -

    - Processors MUST use the first metadata found for processing a tabular data file by using overriding metadata, if provided. Otherwise processors MUST attempt to locate the first metadata document from the Link header or the metadata located through site-wide configuration. If no metadata is supplied or found, processors MUST use embedded metadata. If the metadata does not originate from the embedded metadata, validators MUST verify that the table group description within that metadata is compatible with that in the embedded metadata, as defined in [tabular-metadata]. -

    -
    Note

    - When feasible, processors should start from a metadata file and publishers should link to metadata files directly, rather than depend on mechanisms outlined in this section for locating metadata from a tabular data file. Otherwise, if possible, publishers should provide a Link header on the tabular data file as described in section 5.2 Link Header. -

    -
    Note

    - If there is no site-wide location configuration, section 5.3 Default Locations and Site-wide Location Configuration specifies default URI patterns or paths to be used to locate metadata.

    -

    -
    -

    5.1 Overriding Metadata

    -

    - Processors SHOULD provide users with the facility to provide their own metadata for tabular data files that they process. This might be provided: -

    -
      -
    • through processor options, such as command-line options for a command-line implementation or checkboxes in a GUI.
    • -
    • by enabling the user to select an existing metadata file, which may be local or remote.
    • -
    • by enabling the user to specify a series of metadata files, which are merged by the processor and handled as if they were a single file.
    • -
    -

    - For example, a processor might be invoked with: -

    -
    Example 2: Command-line CSV processing with column types
    $ csvlint data.csv --datatypes:string,float,string,string
    -

    - to enable the testing of the types of values in the columns of a CSV file, or with: -

    -
    Example 3: Command-line CSV processing with a schema
    $ csvlint data.csv --schema:schema.json
    -

    - to supply a schema that describes the contents of the file, against which it can be validated. -

    -

    - Metadata supplied in this way is called overriding, or user-supplied, metadata. Implementations SHOULD define how any options they define are mapped into the vocabulary defined in [tabular-metadata]. If the user selects existing metadata files, implementations MUST NOT use metadata located through the Link header (as described in section 5.2 Link Header) or site-wide location configuration (as described in section 5.3 Default Locations and Site-wide Location Configuration). -

    -
    Note

    - Users should ensure that any metadata from those locations that they wish to use is explicitly incorporated into the overriding metadata that they use to process tabular data. Processors may provide facilities to make this easier by automatically merging metadata files from different locations, but this specification does not define how such merging is carried out. -

    -
    - -
    -

    5.3 Default Locations and Site-wide Location Configuration

    -

    - If the user has not supplied a metadata file as overriding metadata, described in section 5.1 Overriding Metadata, and no applicable metadata file has been discovered through a Link header, described in section 5.2 Link Header, processors MUST attempt to locate a metadata documents through site-wide configuration. -

    -

    - In this case, processors MUST retrieve the file from the well-known URI /.well-known/csvm. (Well-known URIs are defined by [RFC5785].) If no such file is located (i.e. the response results in a client error 4xx status code or a server error 5xx status code), processors MUST proceed as if this file were found with the following content which defines default locations: -

    -
    {+url}-metadata.json
    -csv-metadata.json
    -        
    -

    - The response to retrieving /.well-known/csvm MAY be cached, subject to cache control directives. This includes caching an unsuccessful response such as a 404 Not Found. -

    -

    - This file MUST contain a URI template, as defined by [URI-TEMPLATE], on each line. Starting with the first such URI template, processors MUST: -

    -
      -
    1. Expand the URI template, with the variable url being set to the URL of the requested tabular data file (with any fragment component of that URL removed).
    2. -
    3. Resolve the resulting URL against the URL of the requested tabular data file.
    4. -
    5. Attempt to retrieve a metadata document at that URL.
    6. -
    7. If no metadata document is found at that location, or if the metadata file found at the location does not explicitly include a reference to the relevant tabular data file, perform these same steps on the next URI template, otherwise use that metadata document.
    8. -
    -

    - For example, if the tabular data file is at http://example.org/south-west/devon.csv then processors must attempt to locate a well-known file at http://example.org/.well-known/csvm. If that file contains: -

    -
    Example 5
    {+url}.json
    -csvm.json
    -/csvm?file={url}
    -

    - the processor will first look for http://example.org/south-west/devon.csv.json. If there is no metadata file in that location, it will then look for http://example.org/south-west/csvm.json. Finally, if that also fails, it will look for http://example.org/csvm?file=http://example.org/south-west/devon.csv.json. -

    -

    - If no file were found at http://example.org/.well-known/csvm, the processor will use the default locations and try to retrieve metadata from http://example.org/south-west/devon.csv-metadata.json and, if unsuccessful, http://example.org/south-west/csv-metadata.json. -

    -
    -
    -

    5.4 Embedded Metadata

    -

    - Most syntaxes for tabular data provide a facility for embedding metadata within the tabular data file itself. The definition of a syntax for tabular data SHOULD include a description of how the syntax maps to an annotated data model, and in particular how any embedded metadata is mapped into the vocabulary defined in [tabular-metadata]. Parsing based on the default dialect for CSV, as described in 8. Parsing Tabular Data, will extract column titles from the first row of a CSV file. -

    -
    Example 6: http://example.org/tree-ops.csv
    GID,On Street,Species,Trim Cycle,Inventory Date
    -1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010
    -2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
    -

    The results of this can be found in section 8.2.1 Simple Example.

    -

    For another example, the following tab-delimited file contains embedded metadata where it is assumed that comments may be added using a #, and that the column types may be indicated using a #datatype annotation: -

    -
    Example 7: Tab-separated file containing embedded metadata
    # publisher City of Palo Alto
    -# updated 12/31/2010
    -#name GID on_street species trim_cycle  inventory_date
    -#datatype string  string  string  string  date:M/D/YYYY
    -  GID On Street Species Trim Cycle  Inventory Date
    -  1 ADDISON AV  Celtis australis  Large Tree Routine Prune  10/18/2010
    -  2 EMERSON ST  Liquidambar styraciflua Large Tree Routine Prune  6/2/2010
    -

    - A processor that recognises this format may be able to extract and make sense of this embedded metadata. -

    -
    +
    + [CONTENT]
    -
    -

    6. Processing Tables

    -

    - This section describes how particular types of applications should process tabular data and metadata files. -

    -

    - In many cases, an application will start processing from a metadata file. In that case, the initial metadata file is treated as overriding metadata and the application MUST NOT continue to retrieve other available metadata about each of the tabular data files referenced by that initial metadata file other than embedded metadata. -

    -

    - In other cases, applications will start from a tabular data file, such as a CSV file, and locate metadata from that file. This metadata will be used to process the file as if the processor were starting from that metadata file. -

    -

    - For example, if a validator is passed a locally authored metadata file spending.json, which contains: -

    -
    Example 8: Metadata file referencing multiple tabular data files sharing a schema
    {
    -  "tableSchema": "government-spending.csv",
    -  "tables": [{
    -    "url": "http://example.org/east-sussex-2015-03.csv",
    -  }, {
    -    "url": "http://example.org/east-sussex-2015-02.csv"
    -  }, ...
    -  ]
    -}
    -

    - the validator would validate all the listed tables, using the locally defined schema at government-spending.csv. It would also use the metadata embedded in the referenced CSV files; for example, when processing http://example.org/east-sussex-2015-03.csv, it would use embedded metadata within that file to verify that the CSV is compatible with the metadata. -

    -

    - If a validator is passed a tabular data file http://example.org/east-sussex-2015-03.csv, the validator would use the metadata located from the CSV file: the first metadata file found through the Link headers found when retrieving that file, or located through a site-wide location configuration. -

    -
    Note

    Starting with a metadata file can remove the need to perform additional requests to locate linked metadata, or metadata retrieved through site-wide location configuration

    -
    -

    6.1 Creating Annotated Tables

    -

    After locating metadata, metadata is normalized and coerced into a single table group description. When starting with a metadata file, this involves normalizing the provided metadata file and verifying that the embedded metadata for each tabular data file referenced from the metadata is compatible with the metadata. When starting with a tabular data file, this involves locating the first metadata file as described in section 5. Locating Metadata and normalizing into a single descriptor. -

    -

    If processing starts with a tabular data file, implementations:

    -
      -
    1. Retrieve the tabular data file.
    2. -
    3. Retrieve the first metadata file (FM) as described in section 5. Locating Metadata: -
        -
      1. metadata supplied by the user (see section 5.1 Overriding Metadata).
      2. -
      3. metadata referenced from a Link Header that may be returned when retrieving the tabular data file (see section 5.2 Link Header).
      4. -
      5. metadata retrieved through a site-wide location configuration (see section 5.3 Default Locations and Site-wide Location Configuration).
      6. -
      7. embedded metadata as defined in section 5.4 Embedded Metadata with a single tables entry where the url property is set from that of the tabular data file.
      8. -
      -
    4. -
    5. Proceed as if the process starts with FM.
    6. -
    -

    If the process starts with a metadata file:

    -
      -
    1. Retrieve the metadata file yielding the metadata UM (which is treated as overriding metadata, see section 5.1 Overriding Metadata).
    2. -
    3. Normalize UM using the process defined in Normalization in [tabular-metadata], coercing UM into a table group description, if necessary.
    4. -
    5. For each table (TM) in UM in order, create one or more annotated tables: -
        -
      1. Extract the dialect description (DD) from UM for the table associated with the tabular data file. If there is no such dialect description, extract the first available dialect description from a group of tables in which the tabular data file is described. Otherwise use the default dialect description.
      2. -
      3. If using the default dialect description, override default values in DD based on HTTP headers found when retrieving the tabular data file: -
          -
        • If the media type from the Content-Type header is text/tab-separated-values, set delimiter to TAB in DD.
        • -
        • If the Content-Type header includes the header parameter with a value of absent, set header to false in DD.
        • -
        • If the Content-Type header includes the charset parameter, set encoding to this value in DD.
        • -
        -
      4. -
      5. -

        Parse the tabular data file, using DD as a guide, to create a basic tabular data model (T) and extract embedded metadata (EM), for example from the header line.

        -
        Note

        This specification provides a non-normative definition for parsing CSV-based files, including the extraction of embedded metadata, in section 8. Parsing Tabular Data. This specification does not define any syntax for embedded metadata beyond this; whatever syntax is used, it's assumed that metadata can be mapped to the vocabulary defined in [tabular-metadata].

        -
      6. -
      7. If a Content-Language HTTP header was found when retrieving the tabular data file, and the value provides a single language, set the lang inherited property to this value in TM, unless TM already has a lang inherited property.
      8. -
      9. Verify that TM is compatible with EM using the procedure defined in Table Description Compatibility in [tabular-metadata]; if TM is not compatible with EM validators MUST raise an error, other processors MUST generate a warning and continue processing.
      10. -
      11. Use the metadata TM to add annotations to the tabular data model T as described in Section 2 Annotating Tables in [tabular-metadata].
      12. -
      -
    6. -
    -
    -
    -

    6.2 Metadata Compatibility

    -

    When processing a tabular data file using metadata as discovered using section 5. Locating Metadata, processors MUST ensure that the metadata and tabular data file are compatible, this is typically done by extracting embedded metadata from the tabular data file and determining that the provided or discovered metadata is compatible with the embedded metadata using the procedure defined in Table Compatibility in [tabular-metadata].

    -
    -
    -

    6.3 URL Normalization

    -

    Metadata Discovery and Compatibility involve comparing URLs. When comparing URLs, processors MUST use Syntax-Based Normalization as defined in [RFC3968]. Processors MUST perform Scheme-Based Normalization for HTTP (80) and HTTPS (443) and SHOULD perform Scheme-Based Normalization for other well-known schemes.

    -
    -
    -

    6.4 Parsing Cells

    -

    - Unlike many other data formats, tabular data is designed to be read by humans. For that reason, it's common for data to be represented within tabular data in a human-readable way. The - datatype, - default, - lang, - null, - required, and - separator annotations provide the information needed to parse the string value of a cell into its (semantic) value annotation. This is used: -

    -
      -
    • by validators to check that the data in the table is in the expected format,
    • -
    • by converters to parse the values before mapping them into values in the target of the conversion,
    • -
    • when displaying data, to map it into formats that are meaningful for those viewing the data (as opposed to those publishing it), and
    • -
    • when inputting data, to turn entered values into representations in a consistent format.
    • -
    -

    The process of parsing a cell creates a cell with annotations based on the original string value, parsed value and other column annotations and adds the cell to the list of cells in a row and cells in a column:

    - -

    - After parsing, the cell value can be: -

    -
      -
    • null,
    • -
    • a single value with an associated optional datatype or language, or
    • -
    • a sequence of such values.
    • -
    -

    - The process of parsing the string value into a single value or a list of values is as follows: -

    -
      -
    1. unless the datatype base is string, json, xml, html or anyAtomicType, replace all carriage return (#xD), line feed (#xA), and tab (#x9) characters with space characters.
    2. -
    3. unless the datatype base is string, json, xml, html, anyAtomicType, or normalizedString, strip leading and trailing whitespace from the string value and replace all instances of two or more whitespace characters with a single space character.
    4. -
    5. if the normalized string is an empty string, apply the remaining steps to the string given by the column default annotation.
    6. -
    7. if the column separator annotation is not null and the normalized string is an empty string, the cell value is an empty list. If the column required annotation is true, add an error to the list of errors for the cell.
    8. -
    9. if the column separator annotation is not null, the cell value is a list of values; set the list annotation on the cell to true, and create the cell value created by: -
        -
      1. if the normalized string is the same as any one of the values of the column null annotation, then the resulting value is null.
      2. -
      3. split the normalized string at the character specified by the column separator annotation.
      4. -
      5. unless the datatype base is string or anyAtomicType, strip leading and trailing whitespace from these strings.
      6. -
      7. applying the remaining steps to each of the strings in turn.
      8. -
      -
    10. -
    11. if the string is an empty string, apply the remaining steps to the string given by the column default annotation.
    12. -
    13. if the string is the same as any one of the values of the column null annotation, then the resulting value is null. If the column separator annotation is null and the column required annotation is true, add an error to the list of errors for the cell.
    14. -
    15. parse the string using the datatype format if one is specified, as described below to give a value with an associated datatype. If the datatype base is string, or there is no datatype, the value has an associated language from the column lang annotation. If there are any errors, add them to the list of errors for the cell; in this case the value has a datatype of string; if the datatype base is string, or there is no datatype, the value has an associated language from the column lang annotation.
    16. -
    17. validate the value based on the length constraints described in section 4.6.1 Length Constraints, the value constraints described in section 4.6.2 Value Constraints and the datatype format annotation if one is specified, as described below. If there are any errors, add them to the list of errors for the cell.
    18. -
    -

    The final value (or values) become the value annotation on the cell.

    -

    If there is a about URL annotation on the column, it becomes the about URL annotation on the cell, after being transformed into an absolute URL as described in URI Template Properties of [tabular-metadata].

    -

    If there is a property URL annotation on the column, it becomes the property URL annotation on the cell, after being transformed into an absolute URL as described in URI Template Properties of [tabular-metadata].

    -

    If there is a value URL annotation on the column, it becomes the value URL annotation on the cell, after being transformed into an absolute URL as described in URI Template Properties of [tabular-metadata]. The value URL annotation is null if the cell value is null and the column virtual annotation is false.

    -
    -

    6.4.1 Parsing examples

    This section is non-normative.

    -

    - When datatype annotation is available, the value of a cell is the same as its string value. For example, a cell with a string value of "99" would similarly have the (semantic) value "99". -

    -

    - If a datatype base is provided for the cell, that is used to create a (semantic) value for the cell. For example, if the metadata contains: -

    -
    Example 9
    "datatype": "integer"
    -

    - for the cell with the string value "99" then the value of that cell will be the integer 99. A cell whose string value was not a valid integer (such as "one" or "1.0") would be assigned that string value as its (semantic) value annotation, but also have a validation error listed in its errors annotation. -

    -

    - Sometimes data uses special codes to indicate unknown or null values. For example, a particular column might contain a number that is expected to be between 1 and 10, with the string 99 used in the original tabular data file to indicate a null value. The metadata for such a column would include: -

    -
    Example 10
    "datatype": {
    -  "base": "integer",
    -  "minimum": 1,
    -  "maximum": 10
    -},
    -"null": "99"
    -

    - In this case, a cell with a string value of "5" would have the (semantic) value of the integer 5; a cell with a string value of "99" would have the value null. -

    -

    - Similarly, a cell may be assigned a default value if the string value for the cell is empty. A configuration such as: -

    -
    Example 11
    "datatype": {
    -  "base": "integer",
    -  "minimum": 1,
    -  "maximum": 10
    -},
    -"default": "5"
    -

    - In this case, a cell whose string value is "" would be assigned the value of the integer 5. A cell whose string value contains whitespace, such as a single tab character, would also be assigned the value of the integer 5: when the datatype is something other than string or anyAtomicType, leading and trailing whitespace is stripped from string values before the remainder of the processing is carried out. -

    -

    - Cells can contain sequences of values. For example, a cell might have the string value "1 5 7.0". In this case, the separator is a space character. The appropriate configuration would be: -

    -
    Example 12
    "datatype": {
    -  "base": "integer",
    -  "minimum": 1,
    -  "maximum": 10
    -},
    -"default": "5",
    -"separator": " "
    -

    - and this would mean that the cell's value would be an array containing two integers and a string: [1, 5, "7.0"]. The final value of the array is a string because it is not a valid integer; the cell's errors annotation will also contain a validation error. -

    -

    - Also, with this configuration, if the string value of the cell were "" (i.e. it was an empty cell) the value of the cell would be an empty list. -

    -

    - A cell value can be inserted into a URL created using a URI template property such as valueUrl. For example, if a cell with the string value "1 5 7.0" were in a column named values, defined with: -

    -
    Example 13
    "datatype": "decimal",
    -"separator": " ",
    -"valueUrl": "{?values}"
    -

    - then after expansion of the URI template, the resulting valueUrl would be ?values=1.0,5.0,7.0. The canonical representations of the decimal values are used within the URL. -

    -
    -
    -

    6.4.2 Formats for numeric types

    -

    - By default, numeric values must be in the formats defined in [xmlschema11-2]. It is not uncommon for numbers within tabular data to be formatted for human consumption, which may involve using commas for decimal points, grouping digits in the number using commas, or adding percent signs to the number. -

    -

    - If the datatype base is a numeric type, the datatype format annotation indicates the expected format for that number. Its value MUST be either a single string or an object with one or more of the properties: -

    -
    -
    decimalChar
    -
    A string whose value is used to represent a decimal point within the number. The default value is ".". If the supplied value is not a string, implementations MUST issue a warning and proceed as if the property had not been specified.
    -
    groupChar
    -
    A string whose value is used to group digits within the number. The default value is null. If the supplied value is not a string, implementations MUST issue a warning and proceed as if the property had not been specified.
    -
    pattern
    -
    A number format pattern as defined in [UAX35]. Implementations MUST recognise number format patterns containing the symbols 0, #, the specified decimalChar (or "." if unspecified), the specified groupChar (or "," if unspecified), E, +, % and . Implementations MAY additionally recognise number format patterns containing other special pattern characters defined in [UAX35]. If the supplied value is not a string, or if it contains an invalid number format pattern or uses special pattern characters that the implementation does not recognise, implementations MUST issue a warning and proceed as if the property had not been specified.
    -
    -

    - If the datatype format annotation is a single string, this is interpreted in the same way as if it were an object with a pattern property whose value is that string. -

    -

    - If the groupChar is specified, but no pattern is supplied, when parsing the string value of a cell against this format specification, implementations MUST recognise and parse numbers that consist of: -

    -
      -
    1. an optional + or - sign,
    2. -
    3. followed by a decimal digit (0-9),
    4. -
    5. followed by any number of decimal digits (0-9) and the string specified as the groupChar,
    6. -
    7. followed by an optional decimalChar followed by one or more decimal digits (0-9),
    8. -
    9. followed by an optional exponent, consisting of an E followed by an optional + or - sign followed by one or more decimal digits (0-9), or
    10. -
    11. followed by an optional percent (%) or per-mille () sign.
    12. -
    -

    - or that are one of the special values: -

    -
      -
    1. NaN,
    2. -
    3. INF, or
    4. -
    5. -INF.
    6. -
    -

    - Implementations MAY also recognise numeric values that are in any of the standard-decimal, standard-percent or standard-scientific formats listed in the Unicode Common Locale Data Repository. -

    -

    - Implementations MUST add a validation error to the errors annotation for the cell, and set the cell value to a string rather than a number if the string being parsed: -

    -
      -
    • is not in the format specified in the pattern, if one is defined
    • -
    • otherwise, if the string -
        -
      • does not meet the numeric format defined above,
      • -
      • contains two consecutive groupChar strings,
      • -
      -
    • -
    • contains the decimalChar, if the datatype base is integer or one of its sub-types,
    • -
    • contains an exponent, if the datatype base is decimal or one of its sub-types, or
    • -
    • is one of the special values NaN, INF, or -INF, if the datatype base is decimal or one of its sub-types.
    • -
    -

    - Implementations MUST use the sign, exponent, percent, and per-mille signs when parsing the string value of a cell to provide the value of the cell. For example, the string value "-25%" must be interpreted as -0.25 and the string value "1E6" as 1000000. -

    -
    -
    -

    6.4.3 Formats for booleans

    -

    - Boolean values may be represented in many ways aside from the standard 1 and 0 or true and false. -

    -

    - If the datatype base for a cell is boolean, the datatype format annotation provides the true value followed by the false value, separated by |. For example if format is Y|N then cells must hold either Y or N with Y meaning true and N meaning false. If the format does not follow this syntax, implementations MUST issue a warning and proceed as if no format had been provided. -

    -

    - The resulting cell value will be one or more boolean true or false values. -

    -
    -
    -

    6.4.4 Formats for dates and times

    -

    - By default, dates and times are assumed to be in the format defined in [xmlschema11-2]. However dates and times are commonly represented in tabular data in other formats. -

    -

    - If the datatype base is a date or time type, the datatype format annotation indicates the expected format for that date or time. -

    -

    - The supported date and time format patterns listed here are expressed in terms of the date field symbols defined in [UAX35]. These formats MUST be recognised by implementations and MUST be interpreted as defined in that specification. Implementations MAY additionally recognise other date format patterns. Implementations MUST issue a warning if the date format pattern is invalid or not recognised and proceed as if no date format pattern had been provided. -

    -
    Note

    - For interoperability, authors of metadata documents SHOULD use only the formats listed in this section. -

    -

    - The following date format patterns MUST be recognized by implementations: -

    -
      -
    • yyyy-MM-dd e.g., 2015-03-22
    • -
    • yyyyMMdd e.g., 20150322
    • -
    • dd-MM-yyyy e.g., 22-03-2015
    • -
    • d-M-yyyy e.g., 22-3-2015
    • -
    • MM-dd-yyyy e.g., 03-22-2015
    • -
    • M-d-yyyy e.g., 3-22-2015
    • -
    • dd/MM/yyyy e.g., 22/03/2015
    • -
    • d/M/yyyy e.g., 22/3/2015
    • -
    • MM/dd/yyyy e.g., 03/22/2015
    • -
    • M/d/yyyy e.g., 3/22/2015
    • -
    • dd.MM.yyyy e.g., 22.03.2015
    • -
    • d.M.yyyy e.g., 22.3.2015
    • -
    • MM.dd.yyyy e.g., 03.22.2015
    • -
    • M.d.yyyy e.g., 3.22.2015
    • -
    -

    - The following time format patterns MUST be recognized by implementations: -

    -
      -
    • HH:mm:ss.S with one or more trailing S characters indicating the maximum number of fractional seconds e.g., HH:mm:ss.SSS for 15:02:37.143
    • -
    • HH:mm:ss e.g., 15:02:37
    • -
    • HHmmss e.g., 150237
    • -
    • HH:mm e.g., 15:02
    • -
    • HHmm e.g., 1502
    • -
    -

    - The following date/time format patterns MUST be recognized by implementations: -

    -
      -
    • yyyy-MM-ddTHH:mm:ss.S with one or more trailing S characters indicating the maximum number of fractional seconds e.g., yyyy-MM-ddTHH:mm:ss.SSS for 2015-03-15T15:02:37.143
    • -
    • yyyy-MM-ddTHH:mm:ss e.g., 2015-03-15T15:02:37
    • -
    • yyyy-MM-ddTHH:mm e.g., 2015-03-15T15:02
    • -
    • any of the date formats above, followed by a single space, followed by any of the time formats above, e.g., M/d/yyyy HH:mm for 3/22/2015 15:02 or dd.MM.yyyy HH:mm:ss for 22.03.2015 15:02:37
    • -
    -

    - Implementations MUST also recognise date, time, and date/time format patterns that end with timezone markers consisting of between one and three x or X characters, possibly after a single space. These MUST be interpreted as follows: -

    -
      -
    • X e.g., -08, +0530, or Z (minutes are optional)
    • -
    • XX e.g., -0800, +0530, or Z
    • -
    • XXX e.g., -08:00, +05:30, or Z
    • -
    • x e.g., -08 or +0530 (Z is not permitted)
    • -
    • xx e.g., -0800 or +0530 (Z is not permitted)
    • -
    • xxx e.g., -08:00 or +05:30 (Z is not permitted)
    • -
    -

    - For example, date format patterns could include yyyy-MM-ddTHH:mm:ssXXX for 2015-03-15T15:02:37Z or 2015-03-15T15:02:37-05:00, or HH:mm x for 15:02 -05. -

    -

    - The cell value will one or more dates/time values extracted using the format. -

    -
    Note

    - For simplicity, this version of this standard does not support abbreviated or full month or day names, or double digit years. Future versions of this standard may support other date and time formats, or general purpose date/time pattern strings. Authors of schemas SHOULD use appropriate regular expressions, along with the string datatype, for dates and times that use a format other than that specified here. -

    -
    -
    -

    6.4.5 Formats for durations

    -

    - Durations MUST be formatted and interpreted as defined in [xmlschema11-2], using the [ISO8601] format -?PnYnMnDTnHnMnS. For example, the duration P1Y1D is used for a year and a day; the duration PT2H30M for 2 hours and 30 minutes. -

    -

    - If the datatype base is a duration type, the datatype format annotation provides a regular expression for the string values, with syntax and processing defined by [ECMASCRIPT]. If the supplied value is not a valid regular expression, implementations MUST issue a warning and proceed as if no format had been provided. -

    -
    Note

    - Authors are encouraged to be conservative in the regular expressions that they use, sticking to the basic features of regular expressions that are likely to be supported across implementations. -

    -

    - The cell value will be one or more durations extracted using the format. -

    -
    -
    -

    6.4.6 Formats for other types

    -

    - If the datatype base is not numeric, boolean, a date/time type, or a duration type, the datatype format annotation provides a regular expression for the string values, with syntax and processing defined by [ECMASCRIPT]. If the supplied value is not a valid regular expression, implementations MUST issue a warning and proceed as if no format had been provided. -

    -
    Note

    - Authors are encouraged to be conservative in the regular expressions that they use, sticking to the basic features of regular expressions that are likely to be supported across implementations. -

    -

    - Values that are labelled as html, xml, or json SHOULD NOT be validated against those formats. -

    -
    Note

    - Metadata creators who wish to check the syntax of HTML, XML, or JSON within tabular data should use the datatype format annotation to specify a regular expression against which such values will be tested. -

    -
    -
    -
    -

    6.5 Presenting Tables

    This section is non-normative.

    -

    - When presenting tables, implementations should: -

    -
      -
    • use the table direction annotation on each table, and the text direction annotation on each cell, to determine the ordering of columns and characters within cells, as described in section 6.5.1 Bidirectional Tables
    • -
    • use the titles annotation on each column to provide a header for the column, selecting the first title in a language based on the user's locale and preferences, as described in section 6.5.2 Column and row labelling
    • -
    • add links to headers based on the property URLs of the cells in the first row of the table
    • -
    • present cell values, particularly boolean, numeric and date/time values, in a lexical form based on the user's locale and preferences
    • -
    • add links to the presentation of rows and cells based on the about URL and value URL annotations on cells
    • -
    • highlight or otherwise indicate cells with errors
    • -
    • provide a way of viewing non-core annotations on table groups, tables, columns, rows and cells
    • -
    • provide links to download the raw tabular data file that is being displayed
    • -
    -
    -

    6.5.1 Bidirectional Tables

    -

    - There are two levels of bidirectionality to consider when displaying tables: the directionality of the table (i.e., whether the columns should be arranged left-to-right or right-to-left) and the directionality of the content of individual cells. -

    -

    - The table direction annotation on the table provides information about the desired display of the columns in the table. If table direction is ltr then the first column should be displayed on the left and the last column on the right. If table direction is rtl then the first column should be displayed on the right and the last column on the left. -

    -

    - If table direction is auto then tables should be displayed with attention to the bidirectionality of the content of the cells in the table. Specifically, the values of the cells in the table should be scanned breadth first: from the first cell in the first column through to the last cell in the first row, down to the last cell in the last column. If the first character in the table with a strong type as defined in [BIDI] indicates a RTL directionality, the table should be displayed with the first column on the right and the last column on the left. Otherwise, the table should be displayed with the first column on the left and the last column on the right. Characters such as whitespace, quotes, commas, and numbers do not have a strong type, and therefore are skipped when identifying the character that determines the directionality of the table. -

    -

    - Implementations should enable user preferences to override the indicated metadata about the directionality of the table. -

    -

    - Once the directionality of the table has been determined, each cell within the table should be considered as a separate paragraph, as defined by the Unicode Bidirectional Algorithm (UBA) in [BIDI]. The directionality for the cell is determined by looking at the text direction annotation for the cell, as follows: -

    -
      -
    1. If the text direction is ltr then the base direction for the cell content should be set to left-to-right.
    2. -
    3. If the text direction is rtl then the base direction for the cell content should be set to right-to-left.
    4. -
    5. If the text direction is auto then the base direction for the cell content should be set to the direction determined by the first character in the cell with a strong type as defined in [BIDI].
    6. -
    -
    Note

    - If the textDirection property in metadata has the value "inherit", the text direction annotation for a cell inherits its value from the table direction annotation on the table. -

    -

    - When the titles of a column are displayed, these should be displayed in the direction determined by the first character in the title with a strong type as defined in [BIDI]. Titles for the same column in different languages may be displayed in different directions. -

    -
    -
    -

    6.5.2 Column and row labelling

    -

    - The labelling of columns and rows helps those who are attempting to understand the content of a table to grasp what a particular cell means. Implementations should present appropriate titles for columns, and ensure that the most important information in a row is kept apparent to the user, to aid their understanding. For example: -

    -
      -
    • a table presented on the screen might retain certain columns in view so that readers can easily glance at the identifying information in each row
    • -
    • as the user moves focus into a cell, screen readers announce a label for the new column if the user has changed column, or for the new row if the user has changed row
    • -
    -

    - When labelling a column, either on the screen or aurally, implementations should use the first available of: -

    -
      -
    1. the column's titles in the preferred language of the user, or with an undefined language if there is no title available in a preferred language; there may be multiple such titles in which case all should be announced
    2. -
    3. the column's name
    4. -
    5. the column's number
    6. -
    -

    - When labelling a row, either on the screen or aurally, implementations should use the first available of: -

    -
      -
    1. the row's titles in the preferred language of the user, or with an undefined language if there is no title available in a preferred language; there may be multiple such titles in which case all should be announced
    2. -
    3. the values of the cells in the row's primary key
    4. -
    5. the row's number
    6. -
    -
    -
    -
    -

    6.6 Validating Tables

    -

    - Validators test whether given tabular data files adhere to the structure defined within a schema. Validators MUST raise errors (and halt processing) and issue warnings (and continue processing) as defined in [tabular-metadata]. In addition, validators MUST raise errors but MAY continue validating in the following situations: -

    - -
    -
    -

    6.7 Converting Tables

    -

    - Conversions of tabular data to other formats operate over a annotated table constructed as defined in Annotating Tables in [tabular-metadata]. The mechanics of these conversions to other formats are defined in other specifications such as [csv2json] and [csv2rdf]. -

    -

    - Conversion specifications MUST define a default mapping from an annotated table that lacks any annotations (i.e., that is equivalent to an un-annotated table). -

    -

    - Conversion specifications MUST use the property value of the propertyUrl of a column as the basis for naming machine-readable fields in the target format, such as the name of the equivalent element or attribute in XML, property in JSON or property URI in RDF. -

    -

    - Conversion specifications MAY use any of the annotations found on an annotated table group, table, column, row or cell, including non-core annotations, to adjust the mapping into another format. -

    -

    - Conversion specifications MAY define additional annotations, not defined in this specification, which are specifically used when converting to the target format of the conversion. For example, a conversion to XML might specify a http://example.org/conversion/xml/element-or-attribute property on columns that determines whether a particular column is represented through an element or an attribute in the data. -

    -
    -
    -
    -

    7. Best Practice CSV

    This section is non-normative.

    -

    - There is no standard for CSV, and there are many variants of CSV used on the web today. This section defines a method for expressing tabular data adhering to the annotated tabular data model in CSV. Authors are encouraged to adhere to the constraints described in this section as implementations should process such CSV files consistently. -

    -
    Note
    -

    - This syntax is not compliant with text/csv as defined in [RFC4180] in that it permits line endings other than CRLF. Supporting LF line endings is important for data formats that are used on non-Windows platforms. However, all files that adhere to [RFC4180]'s definition of CSV meet the constraints described in this section. -

    -

    - Developing a standard for CSV is outside the scope of the Working Group. The details here aim to help shape any future standard. -

    -
    -
    -

    7.1 Content Type

    -

    - The appropriate content type for a CSV file is text/csv. For example, when a CSV file is transmitted via HTTP, the HTTP response should include a Content-Type header with the value text/csv: -

    -
    Content-Type: text/csv
    -        
    -
    -
    -

    7.2 Encoding

    -

    - CSV files should be encoded using UTF-8, and should be in Unicode Normal Form C as defined in [UAX15]. If a CSV file is not encoded using UTF-8, the encoding should be specified through the charset parameter in the Content-Type header: -

    -
    Content-Type: text/csv;charset=ISO-8859-1
    -        
    -
    -
    -

    7.3 Line Endings

    -

    - The ends of rows in a CSV file should be CRLF (U+000D U+000A) but may be LF (U+000A). Line endings within escaped cells are not normalised. -

    -
    -
    -

    7.4 Lines

    -

    - Each line of a CSV file should contain the same number of comma-separated values. -

    -

    - Values that contain commas, line endings, or double quotes should be escaped by having the entire value wrapped in double quotes. There should not be whitespace before or after the double quotes. Within these escaped cells, any double quotes should be escaped with two double quotes (""). -

    -
    -

    7.4.1 Headers

    -

    - The first line of a CSV file should contain a comma-separated list of names of columns. This is known as the header line and provides titles for the columns. There are no constraints on these titles. -

    -

    - If a CSV file does not include a header line, this should be specified using the header parameter of the media type: -

    -
    Content-Type: text/csv;header=absent
    -          
    -
    -
    - -
    -

    7.5 Grammar

    -

    This grammar is a generalization of that defined in [RFC4180] and is included for reference only.

    -

    The EBNF used here is defined in XML 1.0 [EBNF-NOTATION].

    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    [1]csv::= - header - record+ -
    [2]header::= - record -
    [3]record::= - fields - #x0D? - #x0A -
    [4]fields::= - field - ("," fields)* -
    [5]field::= - WS* - rawfield - WS* -
    [6]rawfield::= - '"' QCHAR* '"' - |SCHAR* -
    [7]QCHAR::= - [^"] - |'""' -
    [8]SCHAR::= - [^",#x0A#x0D] -
    [9]WS::= - [#x20#x09] -
    -
    -
    -
    -
    -

    8. Parsing Tabular Data

    This section is non-normative.

    -

    - As described in section 7. Best Practice CSV, there may be many formats which an application might interpret into the tabular data model described in section 4. Tabular Data Models, including using different separators or fixed format tables, multiple tables within a single file, or ones that have metadata lines before a table header. -

    -
    Note

    - Standardizing the parsing of CSV is outside the chartered scope of the Working Group. This non-normative section is intended to help the creators of parsers handle the wide variety of CSV-based formats that they may encounter due to the current lack of standardization of the format. -

    -

    - This section describes an algorithm for parsing formats that do not adhere to the constraints described in section 7. Best Practice CSV, as well as those that do, and extracting embedded metadata. The parsing algorithm uses the following flags. These may be set by metadata properties found while Locating Metadata, including through user input (see Overriding Metadata), or through the inclusion of a dialect description within a metadata file: -

    -
    -
    comment prefix
    -
    A string that, when it appears at the beginning of a row, indicates that the row is a comment that should be associated as a rdfs:comment annotation to the table. This is set by the commentPrefix property of a dialect description. The default is null, which means no rows are treated as comments. A value other than null may mean that the source numbers of rows are different from their numbers.
    -
    delimiter
    -
    The separator between cells, set by the delimiter property of a dialect description. The default is ,.
    -
    encoding
    -
    The character encoding for the file, one of the encodings listed in [encoding], set by the encoding property of a dialect description. The default is utf-8.
    -
    escape character
    -
    The string that is used to escape the quote character within escaped cells, or null, set by the doubleQuote property of a dialect description. The default is " (such that "" is used to escape " within an escaped cell).
    -
    header row count
    -
    The number of header rows (following the skipped rows) in the file, set by the header or headerRowCount property of a dialect description. The default is 1. A value other than 0 will mean that the source numbers of rows will be different from their numbers.
    -
    line terminators
    -
    The strings that can be used at the end of a row, set by the lineTerminators property of a dialect description. The default is [CRLF, LF].
    -
    quote character
    -
    The string that is used around escaped cells, or null, set by the quoteChar property of a dialect description. The default is ".
    -
    skip blank rows
    -
    Indicates whether to ignore wholly empty rows (i.e. rows in which all the cells are empty), set by the skipBlankRows property of a dialect description. The default is false. A value other than false may mean that the source numbers of rows are different from their numbers.
    -
    skip columns
    -
    The number of columns to skip at the beginning of each row, set by the skipColumns property of a dialect description. The default is 0. A value other than 0 will mean that the source numbers of columns will be different from their numbers.
    -
    skip rows
    -
    The number of rows to skip at the beginning of the file, before a header row or tabular data, set by the skipRows property of a dialect description. The default is 0. A value greater than 0 will mean that the source numbers of rows will be different from their numbers.
    -
    trim
    -
    Indicates whether to trim whitespace around cells; may be true, false, start, or end, set by the skipInitialSpace or trim property of a dialect description. The default is true.
    -
    -

    - The algorithm for using these flags to parse a document containing tabular data to create a basic annotated tabular data model and to extract embedded metadata is as follows: -

    -
      -
    1. - Create a new table T with the annotations: - -
    2. -
    3. - Create a metadata document structure M that looks like: -
      {
      -  "@context": "http://www.w3.org/ns/csvw",
      -  "rdfs:comment": []
      -  "tableSchema": {
      -    "columns": []
      -  }
      -}
      -          
      -
    4. -
    5. - If the URL of the tabular data file being parsed is known, set the url property on M to that URL. -
    6. -
    7. - Set source row number to 1. -
    8. -
    9. -

      - Read the file using the encoding, as specified in [encoding], using the replacement error mode. If the encoding is not a Unicode encoding, use a normalizing transcoder to normalize into Unicode Normal Form C as defined in [UAX15]. -

      -
      Note

      - The replacement error mode ensures that any non-Unicode characters within the CSV file are replaced by U+FFFD, ensuring that strings within the tabular data model such as column titles and cell string values only contain valid Unicode characters. -

      -
    10. -
    11. - Repeat the following the number of times indicated by skip rows: -
        -
      1. Read a row to provide the row content.
      2. -
      3. If the comment prefix is not null and the row content begins with the comment prefix, strip that prefix from the row content, and add the resulting string to the M.rdfs:comment array.
      4. -
      5. Otherwise, if the row content is not an empty string, add the row content to the M.rdfs:comment array.
      6. -
      7. Add 1 to the source row number.
      8. -
      -
    12. -
    13. - Repeat the following the number of times indicated by header row count: -
        -
      1. Read a row to provide the row content.
      2. -
      3. If the comment prefix is not null and the row content begins with the comment prefix, strip that prefix from the row content, and add the resulting string to the M.rdfs:comment array.
      4. -
      5. Otherwise, parse the row to provide a list of cell values, and: -
          -
        1. Remove the first skip columns number of values from the list of cell values.
        2. -
        3. For each of the remaining values at index i in the list of cell values: -
            -
          1. If the value at index i in the list of cell values is an empty string or consists only of whitespace, do nothing.
          2. -
          3. Otherwise, if there is no column description object at index i in M.tableSchema.columns, create a new one with a title property whose value is an array containing a single value that is the value at index i in the list of cell values.
          4. -
          5. Otherwise, add the value at index i in the list of cell values to the array at M.tableSchema.columns[i].title.
          6. -
          -
        4. -
        -
      6. -
      7. Add 1 to the source row number.
      8. -
      -
    14. -
    15. - If header row count is zero, create an empty column description object in M.tableSchema.columns for each column in the current row after skip columns. -
    16. -
    17. Set row number to 1.
    18. -
    19. - While it is possible to read another row, do the following: -
        -
      1. Set the source column number to 1.
      2. -
      3. Read a row to provide the row content.
      4. -
      5. If the comment prefix is not null and the row content begins with the comment prefix, strip that prefix from the row content, and add the resulting string to the M.rdfs:comment array.
      6. -
      7. Otherwise, parse the row to provide a list of cell values, and: -
          -
        1. If all of the values in the list of cell values are empty strings, and skip blank rows is true, add 1 to the source row number and move on to process the next row.
        2. -
        3. Otherwise, create a new row R, with: - -
        4. -
        5. Append R to the rows of table T.
        6. -
        7. Remove the first skip columns number of values from the list of cell values and add that number to the source column number.
        8. -
        9. For each of the remaining values at index i in the list of cell values (where i starts at 1): -
            -
          1. Identify the column C at index i within the columns of table T. If there is no such column: -
              -
            1. Create a new column C with: - -
            2. -
            3. Append C to the columns of table T (at index i).
            4. -
            -
          2. -
          3. Create a new cell D, with: - -
          4. -
          5. Append cell D to the cells of column C.
          6. -
          7. Append cell D to the cells of row R (at index i).
          8. -
          9. Add 1 to the source column number.
          10. -
          -
        10. -
        -
      8. -
      9. Add 1 to the source row number.
      10. -
      -
    20. -
    21. If M.rdfs:comment is an empty array, remove the rdfs:comment property from M.
    22. -
    23. Return the table T and the embedded metadata M.
    24. -
    -

    - To read a row to provide row content, perform the following steps: -

    -
      -
    1. Set the row content to an empty string.
    2. -
    3. Read initial characters and process as follows: -
        -
      1. If the string starts with the escape character followed by the quote character, append both strings to the row content, and move on to process the string following the quote character.
      2. -
      3. Otherwise, if the string starts with the escape character and the escape character is not the same as the quote character, append the escape character and the single character following it to the row content and move on to process the string following that character.
      4. -
      5. Otherwise, if the string starts with the quote character, append the quoted value obtained by reading a quoted value to the row content and move on to process the string following the quoted value.
      6. -
      7. Otherwise, if the string starts with one of the line terminators, return the row content.
      8. -
      9. Otherwise, append the first character to the row content and move on to process the string following that character.
      10. -
      -
    4. -
    5. If there are no more characters to read, return the row content.
    6. -
    -

    - To read a quoted value to provide a quoted value, perform the following steps: -

    -
      -
    1. Set the quoted value to an empty string.
    2. -
    3. Read the initial quote character and add a quote character to the quoted value.
    4. -
    5. Read initial characters and process as follows: -
        -
      1. If the string starts with the escape character followed by the quote character, append both strings to the quoted value, and move on to process the string following the quote character.
      2. -
      3. Otherwise, if string starts with the escape character and the escape character is not the same as the quote character, append the escape character and the character following it to the quoted value and move on to process the string following that character.
      4. -
      5. Otherwise, if the string starts with the quote character, return the quoted value.
      6. -
      7. Otherwise, append the first character to the quoted value and move on to process the string following that character.
      8. -
      -
    6. -
    -

    - To parse a row to provide a list of cell values, perform the following steps: -

    -
      -
    1. Set the list of cell values to an empty list and the current cell value to an empty string.
    2. -
    3. Set the quoted flag to false.
    4. -
    5. Read initial characters and process as follows: -
        -
      1. If the string starts with the escape character followed by the quote character, append the quote character to the current cell value, and move on to process the string following the quote character.
      2. -
      3. Otherwise, if the string starts with the escape character and the escape character is not the same as the quote character, append the character following the escape character to the current cell value and move on to process the string following that character.
      4. -
      5. Otherwise, if the string starts with the quote character then: -
          -
        1. If quoted is false, set the quoted flag to true, and move on to process the remaining string. If the current cell value is not an empty string, raise an error.
        2. -
        3. Otherwise, set quoted to false, and move on to process the remaining string. If the remaining string does not start with the delimiter, raise an error.
        4. -
        -
      6. -
      7. Otherwise, if the string starts with the delimiter, then: -
          -
        1. If quoted is true, append the delimiter string to the current cell value and move on to process the remaining string.
        2. -
        3. Otherwise, conditionally trim the current cell value, add the resulting trimmed cell value to the list of cell values and move on to process the following string.
        4. -
        -
      8. -
      9. Otherwise, append the first character to the current cell value and move on to process the remaining string.
      10. -
      -
    6. -
    7. If there are no more characters to read, conditionally trim the current cell value, add the resulting trimmed cell value to the list of cell values and return the list of cell values.
    8. -
    -

    - To conditionally trim a cell value to provide a trimmed cell value, perform the following steps:

    -
      -
    1. Set the trimmed cell value to the provided cell value.
    2. -
    3. If trim is true or start then remove any leading whitespace from the start of the trimmed cell value and move on to the next step.
    4. -
    5. If trim is true or end then remove any trailing whitespace from the end of the trimmed cell value and move on to the next step.
    6. -
    7. Return the trimmed cell value.
    8. -
    -
    Note

    - This parsing algorithm does not account for the possibility of there being more than one area of tabular data within a single CSV file. -

    -
    -

    8.1 Bidirectionality in CSV Files

    This section is non-normative.

    -

    - Bidirectional content does not alter the definition of rows or the assignment of cells to columns. Whether or not a CSV file contains right-to-left characters, the first column's content is the first cell of each row, which is the text prior to the first occurrence of a comma within that row. -

    -
    -

    - For example, Egyptian Referendum results are available as a CSV file at https://egelections-2011.appspot.com/Referendum2012/results/csv/EG.csv. Over the wire and in non-Unicode-aware text editors, the CSV looks like: -

    -
                
    -‌ا‌ل‌م‌ح‌ا‌ف‌ظ‌ة‌,‌ن‌س‌ب‌ة‌ ‌م‌و‌ا‌ف‌ق‌,‌ن‌س‌ب‌ة‌ ‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌,‌ع‌د‌د‌ ‌ا‌ل‌ن‌ا‌خ‌ب‌ي‌ن‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ص‌ح‌ي‌ح‌ة‌,‌ا‌ل‌أ‌ص‌و‌ا‌ت‌ ‌ا‌ل‌ب‌ا‌ط‌ل‌ة‌,‌ن‌س‌ب‌ة‌ ‌ا‌ل‌م‌ش‌ا‌ر‌ك‌ة‌,‌م‌و‌ا‌ف‌ق‌,‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌
    -‌ا‌ل‌ق‌ل‌ي‌و‌ب‌ي‌ة‌,60.0,40.0,"2,639,808","853,125","15,224",32.9,"512,055","341,070"
    -‌ا‌ل‌ج‌ي‌ز‌ة‌,66.7,33.3,"4,383,701","1,493,092","24,105",34.6,"995,417","497,675"
    -‌ا‌ل‌ق‌ا‌ه‌ر‌ة‌,43.2,56.8,"6,580,478","2,254,698","36,342",34.8,"974,371","1,280,327"
    -‌ق‌ن‌ا‌,84.5,15.5,"1,629,713","364,509","6,743",22.8,"307,839","56,670"
    -...
    -            
    -          
    -

    - Within this CSV file, the first column appears as the content of each line before the first comma and is named المحافظة (appearing at the start of each row as ‌ا‌ل‌م‌ح‌ا‌ف‌ظ‌ة‌ in the example, which is displaying the relevant characters from left to right in the order they appear "on the wire"). -

    -

    - The CSV translates to a table model that looks like: -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Column / Rowcolumn 1column 2column 3column 4column 5column 6column 7column 8column 9
    column namesالمحافظةنسبة موافقنسبة غير موافقعدد الناخبينالأصوات الصحيحةالأصوات الباطلةنسبة المشاركةموافقغير موافق
    row 1القليوبية60.040.02,639,808853,12515,22432.9512,055341,070
    row 2الجيزة66.733.34,383,7011,493,09224,10534.6995,417497,675
    row 3القاهرة43.256.86,580,4782,254,69836,34234.8974,3711,280,327
    row 4قنا84.515.51,629,713364,5096,74322.8307,83956,670
    -

    - The fragment identifier #col=3 identifies the third of the columns, named نسبة غير موافق (appearing as ‌ن‌س‌ب‌ة‌ ‌غ‌ي‌ر‌ ‌م‌و‌ا‌ف‌ق‌ in the example). -

    -

    - section 6.5.1 Bidirectional Tables defines how this table model should be displayed by compliant applications, and how metadata can affect the display. The default is for the display to be determined by the content of the table. For example, if this CSV were turned into an HTML table for display into a web page, it should be displayed with the first column on the right and the last on the left, as follows: -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    غير موافقموافقنسبة المشاركةالأصوات الباطلةالأصوات الصحيحةعدد الناخبيننسبة غير موافقنسبة موافقالمحافظة
    341,070512,05532.915,224853,1252,639,80840.060.0القليوبية
    497,675995,41734.624,1051,493,0924,383,70133.366.7الجيزة
    1,280,327974,37134.836,3422,254,6986,580,47856.843.2القاهرة
    56,670307,83922.86,743364,5091,629,71315.584.5قنا
    -

    - The fragment identifier #col=3 still identifies the third of the columns, named نسبة غير موافق, which appears in the HTML display as the third column from the right and is what those who read right-to-left would think of as the third column. -

    -

    - Note that this display matches that shown on the original website. -

    -
    -
    -
    -

    8.2 Examples

    -
    -

    8.2.1 Simple Example

    -

    - A simple CSV file that complies to the constraints described in section 7. Best Practice CSV, at http://example.org/tree-ops.csv, might look like: -

    -
    Example 14: http://example.org/tree-ops.csv
    GID,On Street,Species,Trim Cycle,Inventory Date
    -1,ADDISON AV,Celtis australis,Large Tree Routine Prune,10/18/2010
    -2,EMERSON ST,Liquidambar styraciflua,Large Tree Routine Prune,6/2/2010
    -

    - Parsing this file results in an annotated tabular data model of a single table T with five columns and two rows. The columns have the annotations shown in the following table: -

    - - - - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercellstitles
    C1T11C1.1, C2.1GID
    C2T22C1.2, C2.2On Street
    C3T33C1.3, C2.3Species
    C4T44C1.4, C2.4Trim Cycle
    C5T55C1.5, C2.5Inventory Date
    -

    The extracted embedded metadata, as defined in [tabular-metadata], would look like:

    -
    Example 15: tree-ops.csv Embedded Metadata
    {
    -  "@type": "Table",
    -  "url": "http://example.org/tree-ops.csv",
    -  "tableSchema": {
    -    "columns": [
    -      {"titles": [ "GID" ]},
    -      {"titles": [ "On Street" ]},
    -      {"titles": [ "Species" ]},
    -      {"titles": [ "Trim Cycle" ]},
    -      {"titles": [ "Inventory Date" ]}
    -    ]
    -  }
    -}
    -

    - The rows have the annotations shown in the following table: -

    - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercells
    R1T12C1.1, C1.2, C1.3, C1.4, C1.5
    R2T23C2.1, C2.2, C2.3, C2.4, C2.5
    -
    Note

    - The source number of each row is offset by one from the number of each row because in the source CSV file, the header line is the first line. It is possible to reconstruct a [RFC7111] compliant reference to the first record in the original CSV file (http://example.org/tree-ops.csv#row=2) using the value of the row's source number. This enables implementations to retain provenance between the table model and the original file. -

    -

    - The cells have the annotations shown in the following table (note that the values of all the cells in the table are strings, denoted by the double quotes in the table below): -

    - - - - - - - - - - - - - - - - - -
    idcore annotations
    tablecolumnrowstring valuevalue
    C1.1TC1R1"1""1"
    C1.2TC2R1"ADDISON AV""ADDISON AV"
    C1.3TC3R1"Celtis australis""Celtis australis"
    C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
    C1.5TC5R1"10/18/2010""10/18/2010"
    C2.1TC1R2"2""2"
    C2.2TC2R2"EMERSON ST""EMERSON ST"
    C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
    C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
    C2.5TC5R2"6/2/2010""6/2/2010"
    -
    -
    8.2.1.1 Using Overriding Metadata
    -

    - The tools that the consumer of this data uses may provide a mechanism for overriding the metadata that has been provided within the file itself. For example, they might enable the consumer to add machine-readable names to the columns, or to mark the fifth column as holding a date in the format M/D/YYYY. These facilities are implementation defined; the code for invoking a Javascript-based parser might look like: -

    -
    Example 16: Javascript implementation configuration
    data.parse({
    -  "column-names": ["GID", "on_street", "species", "trim_cycle", "inventory_date"],
    -  "datatypes": ["string", "string", "string", "string", "date"],
    -  "formats": [null,null,null,null,"M/D/YYYY"]
    -});
    -

    - This is equivalent to a metadata file expressed in the syntax defined in [tabular-metadata], looking like: -

    -
    Example 17: Equivalent metadata syntax
    {
    -  "@type": "Table",
    -  "url": "http://example.org/tree-ops.csv",
    -  "tableSchema": {
    -    "columns": [{
    -      "name": "GID",
    -      "datatype": "string"
    -    }, {
    -      "name": "on_street",
    -      "datatype": "string"
    -    }, {
    -      "name": "species",
    -      "datatype": "string"
    -    }, {
    -      "name": "trim_cycle",
    -      "datatype": "string"
    -    }, {
    -      "name": "inventory_date",
    -      "datatype": {
    -        "base": "date",
    -        "format": "M/d/yyyy"
    -      }
    -    }]
    -  }
    -}
    -

    - This would be merged with the embedded metadata found in the CSV file, providing the titles for the columns to create: -

    -
    Example 18: Merged metadata
    {
    -  "@type": "Table",
    -  "url": "http://example.org/tree-ops.csv",
    -  "tableSchema": {
    -    "columns": [{
    -      "name": "GID",
    -      "titles": "GID",
    -      "datatype": "string"
    -    }, {
    -      "name": "on_street",
    -      "titles": "On Street",
    -      "datatype": "string"
    -    }, {
    -      "name": "species",
    -      "titles": "Species",
    -      "datatype": "string"
    -    }, {
    -      "name": "trim_cycle",
    -      "titles": "Trim Cycle",
    -      "datatype": "string"
    -    }, {
    -      "name": "inventory_date",
    -      "titles": "Inventory Date",
    -      "datatype": {
    -        "base": "date",
    -        "format": "M/d/yyyy"
    -      }
    -    }]
    -  }
    -}
    -

    - The processor can then create an annotated tabular data model that included name annotations on the columns, and datatype annotations on the cells, and created cells whose values were of appropriate types (in the case of this Javascript implementation, the cells in the last column would be Date objects, for example). -

    -

    - Assuming this kind of implementation-defined parsing, the columns would then have the annotations shown in the following table: -

    - - - - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercellsnametitlesdatatype
    C1T11C1.1, C2.1GIDGIDstring
    C2T22C1.2, C2.2on_streetOn Streetstring
    C3T33C1.3, C2.3speciesSpeciesstring
    C4T44C1.4, C2.4trim_cycleTrim Cyclestring
    C5T55C1.5, C2.5inventory_dateInventory Date{ "base": "date", "format": "M/d/yyyy" }
    -

    - The cells have the annotations shown in the following table. Because of the overrides provided by the consumer to guide the parsing, and the way the parser works, the cells in the Inventory Date column (cells C1.5 and C2.5) have values that are parsed dates rather than unparsed strings. -

    - - - - - - - - - - - - - - - - - -
    idcore annotations
    tablecolumnrowstring valuevalue
    C1.1TC1R1"1""1"
    C1.2TC2R1"ADDISON AV""ADDISON AV"
    C1.3TC3R1"Celtis australis""Celtis australis"
    C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
    C1.5TC5R1"10/18/2010"2010-10-18
    C2.1TC1R2"2""2"
    C2.2TC2R2"EMERSON ST""EMERSON ST"
    C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
    C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
    C2.5TC5R2"6/2/2010"2010-06-02
    -
    -
    -
    8.2.1.2 Using a Metadata File
    -

    - A similar set of annotations could be provided through a metadata file, located as discussed in section 5. Locating Metadata and defined in [tabular-metadata]. For example, this might look like: -

    -
    Example 19: http://example.org/tree-ops.csv-metadata.json
    {
    -  "@context": ["http://www.w3.org/ns/csvw", {"@language": "en"}],
    -  "url": "tree-ops.csv",
    -  "dc:title": "Tree Operations",
    -  "dcat:keyword": ["tree", "street", "maintenance"],
    -  "dc:publisher": {
    -    "schema:name": "Example Municipality",
    -    "schema:url": {"@id": "http://example.org"}
    -  },
    -  "dc:license": {"@id": "http://opendefinition.org/licenses/cc-by/"},
    -  "dc:modified": {"@value": "2010-12-31", "@type": "xsd:date"},
    -  "tableSchema": {
    -    "columns": [{
    -      "name": "GID",
    -      "titles": ["GID", "Generic Identifier"],
    -      "dc:description": "An identifier for the operation on a tree.",
    -      "datatype": "string",
    -      "required": true
    -    }, {
    -      "name": "on_street",
    -      "titles": "On Street",
    -      "dc:description": "The street that the tree is on.",
    -      "datatype": "string"
    -    }, {
    -      "name": "species",
    -      "titles": "Species",
    -      "dc:description": "The species of the tree.",
    -      "datatype": "string"
    -    }, {
    -      "name": "trim_cycle",
    -      "titles": "Trim Cycle",
    -      "dc:description": "The operation performed on the tree.",
    -      "datatype": "string"
    -    }, {
    -      "name": "inventory_date",
    -      "titles": "Inventory Date",
    -      "dc:description": "The date of the operation that was performed.",
    -      "datatype": {"base": "date", "format": "M/d/yyyy"}
    -    }],
    -    "primaryKey": "GID",
    -    "aboutUrl": "#gid-{GID}"
    -  }
    -}
    -

    - The annotated tabular data model generated from this would be more sophisticated again. The table itself would have the following annotations: -

    -
    -
    dc:title
    -
    {"@value": "Tree Operations", "@language": "en"}
    -
    dcat:keyword
    -
    [{"@value": "tree", "@language", "en"}, {"@value": "street", "@language": "en"}, {"@value": "maintenance", "@language": "en"}]
    -
    dc:publisher
    -
    [{ "schema:name": "Example Municipality", "schema:url": {"@id": "http://example.org"} }]
    -
    dc:license
    -
    {"@id": "http://opendefinition.org/licenses/cc-by/"}
    -
    dc:modified
    -
    {"@value": "2010-12-31", "@type": "date"}
    -
    -

    - The columns would have the annotations shown in the following table: -

    - - - - - - - - - - - - -
    idcore annotationsother annotations
    tablenumbersource numbercellsnametitlesdatatypedc:description
    C1T11C1.1, C2.1GIDGID, Generic IdentifierstringAn identifier for the operation on a tree.
    C2T22C1.2, C2.2on_streetOn StreetstringThe street that the tree is on.
    C3T33C1.3, C2.3speciesSpeciesstringThe species of the tree.
    C4T44C1.4, C2.4trim_cycleTrim CyclestringThe operation performed on the tree.
    C5T55C1.5, C2.5inventory_dateInventory Date{ "base": "date", "format": "M/d/yyyy" }The date of the operation that was performed.
    -

    - The rows have an additional primary key annotation, as shown in the following table: -

    - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercellsprimary key
    R1T12C1.1, C1.2, C1.3, C1.4, C1.5C1.1
    R2T23C2.1, C2.2, C2.3, C2.4, C2.5C2.1
    -

    - Thanks to the provided metadata, the cells again have the annotations shown in the following table. The metadata file has provided the information to supplement the model with additional annotations but also, for the Inventory Date column (cells C1.5 and C2.5), have a value that is a parsed date rather than an unparsed string. -

    - - - - - - - - - - - - - - - - - -
    idcore annotations
    tablecolumnrowstring valuevalueabout URL
    C1.1TC1R1"1""1"http://example.org/tree-ops.csv#gid-1
    C1.2TC2R1"ADDISON AV""ADDISON AV"http://example.org/tree-ops.csv#gid-1
    C1.3TC3R1"Celtis australis""Celtis australis"http://example.org/tree-ops.csv#gid-1
    C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"http://example.org/tree-ops.csv#gid-1
    C1.5TC5R1"10/18/2010"2010-10-18http://example.org/tree-ops.csv#gid-1
    C2.1TC1R2"2""2"http://example.org/tree-ops.csv#gid-2
    C2.2TC2R2"EMERSON ST""EMERSON ST"http://example.org/tree-ops.csv#gid-2
    C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"http://example.org/tree-ops.csv#gid-2
    C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"http://example.org/tree-ops.csv#gid-2
    C2.5TC5R2"6/2/2010"2010-06-02http://example.org/tree-ops.csv#gid-2
    -
    -
    -
    -

    8.2.2 Empty and Quoted Cells

    -

    - The following slightly amended CSV file contains quoted and missing cell values: -

    -
    Example 20: CSV file containing quoted and missing cell values
    GID,On Street,Species,Trim Cycle,Inventory Date
    -1,ADDISON AV,"Celtis australis","Large Tree Routine Prune",10/18/2010
    -2,,"Liquidambar styraciflua","Large Tree Routine Prune",
    -

    - Parsing this file similarly results in an annotated tabular data model of a single table T with five columns and two rows. The columns and rows have exactly the same annotations as previously, but there are two null cell values for C2.2 and C2.5. Note that the quoting of values within the CSV makes no difference to either the string value or value of the cell. -

    - - - - - - - - - - - - - - - - - -
    idcore annotations
    tablecolumnrowstring valuevalue
    C1.1TC1R1"1""1"
    C1.2TC2R1"ADDISON AV""ADDISON AV"
    C1.3TC3R1"Celtis australis""Celtis australis"
    C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
    C1.5TC5R1"10/18/2010""10/18/2010"
    C2.1TC1R2"2""2"
    C2.2TC2R2""null
    C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
    C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
    C2.5TC5R2""null
    -
    -
    -

    8.2.3 Tabular Data Embedding Annotations

    -

    - The following example illustrates some of the complexities that can be involved in parsing tabular data, how the flags described above can be used, and how new tabular data formats could be defined that embed additional annotations into the tabular data model. -

    -

    - In this example, the publishers of the data are using an internal convention to supply additional metadata about the tabular data embedded within the file itself. They are also using a tab as a separator rather than a comma. -

    -
    Example 21: Tab-separated file containing embedded metadata
    #	publisher	City of Palo Alto
    -#	updated	12/31/2010
    -#name	GID	on_street	species	trim_cycle	inventory_date
    -#datatype	string	string	string	string	date:M/D/YYYY
    -	GID	On Street	Species	Trim Cycle	Inventory Date
    -	1	ADDISON AV	Celtis australis	Large Tree Routine Prune	10/18/2010
    -	2	EMERSON ST	Liquidambar styraciflua	Large Tree Routine Prune	6/2/2010
    -
    -
    8.2.3.1 Naive Parsing
    -

    - Naive parsing of the above data will assume a comma separator and thus results in a single table T with a single column and six rows. The column has the annotations shown in the following table: -

    - - - - - - - - -
    idcore annotations
    tablenumbersource numbercellstitles
    C1T11C1.1, C2.1, C3.1, C4.1, C5.1# publisher City of Palo Alto
    -

    - The rows have the annotations shown in the following table: -

    - - - - - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercells
    R1T12C1.1
    R2T23C2.1
    R3T34C3.1
    R4T45C4.1
    R5T56C5.1
    R6T67C6.1
    -

    - The cells have the annotations shown in the following table (note that the values of all the cells in the table are strings, denoted by the double quotes in the table below): -

    - - - - - - - - - - - - - -
    idcore annotations
    tablecolumnrowstring valuevalue
    C1.1TC1R1"# updated 12/31/2010""# updated 12/31/2010"
    C1.1TC1R1"#name GID on_street species trim_cycle inventory_date""#name GID on_street species trim_cycle inventory_date"
    C2.1TC1R2"#datatype string string string string date:M/D/YYYY""#datatype string string string string date:M/D/YYYY"
    C3.1TC1R3" GID On Street Species Trim Cycle Inventory Date"" GID On Street Species Trim Cycle Inventory Date"
    C4.1TC1R4" 1 ADDISON AV Celtis australis Large Tree Routine Prune 10/18/2010"" 1 ADDISON AV Celtis australis Large Tree Routine Prune 10/18/2010"
    C5.1TC1R5" 2 EMERSON ST Liquidambar styraciflua Large Tree Routine Prune 6/2/2010"" 2 EMERSON ST Liquidambar styraciflua Large Tree Routine Prune 6/2/2010"
    -
    -
    -
    8.2.3.2 Parsing with Flags
    -

    - The consumer of the data may use the flags described above to create a more useful set of data from this file. Specifically, they could set: -

    - -

    - Setting these is done in an implementation-defined way. It could be done, for example, by sniffing the contents of the file itself, through command-line options, or by embedding a dialect description into a metadata file associated with the tabular data, which would look like: -

    -
    Example 22: Dialect description
    {
    -  "delimiter": "\t",
    -  "skipRows": 4,
    -  "skipColumns": 1,
    -  "commentPrefix": "#"
    -}
    -

    - With these flags in operation, parsing this file results in an annotated tabular data model of a single table T with five columns and two rows which is largely the same as that created from the original simple example described in section 8.2.1 Simple Example. There are three differences. -

    -

    - First, because the four skipped rows began with the comment prefix, the table itself now has four rdfs:comment annotations, with the values: -

    -
      -
    1. publisher City of Palo Alto
    2. -
    3. updated 12/31/2010
    4. -
    5. name GID on_street species trim_cycle inventory_date
    6. -
    7. datatype string string string string date:M/D/YYYY
    8. -
    -

    - Second, because the first column has been skipped, the source number of each of the columns is offset by one from the number of each column: -

    - - - - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercellstitles
    C1T12C1.1, C2.1GID
    C2T23C1.2, C2.2On Street
    C3T34C1.3, C2.3Species
    C4T45C1.4, C2.4Trim Cycle
    C5T56C1.5, C2.5Inventory Date
    -

    - Finally, because four additional rows have been skipped, the source number of each of the rows is offset by five from the row number (the four skipped rows plus the single header row): -

    - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercells
    R1T16C1.1, C1.2, C1.3, C1.4, C1.5
    R2T27C2.1, C2.2, C2.3, C2.4, C2.5
    -
    -
    -
    8.2.3.3 Recognizing Tabular Data Formats
    -

    - The conventions used in this data (invented for the purpose of this example) are in fact intended to create an annotated tabular data model which includes named annotations on the table itself, on the columns, and on the cells. The creator of these conventions could create a specification for this particular tabular data syntax and register a media type for it. The specification would include statements like: -

    -
      -
    • A tab delimiter is always used.
    • -
    • The first column is always ignored.
    • -
    • When the first column of a row has the value "#", the second column is the name of an annotation on the table and the values of the remaining columns are concatenated to create the value of that annotation.
    • -
    • When the first column of a row has the value #name, the remaining cells in the row provide a name annotation for each column in the table.
    • -
    • When the first column of a row has the value #datatype, the remaining cells in the row provide datatype/format annotations for the cells within the relevant column, and these are interpreted to create the value for each cell in that column.
    • -
    • The first row where the first column is empty is a row of headers; these provide title annotations on the columns in the table.
    • -
    • The remaining rows make up the data of the table.
    • -
    -

    - Parsers that recognized the format could then build a more sophisticated annotated tabular data model using only the embedded information in the tabular data file. They would extract embedded metadata looking like: -

    -
    Example 23: Embedded metadata in the format of the annotated tabular model
    {
    -  "@context": "http://www.w3.org/ns/csvw",
    -  "url": "tree-ops.csv",
    -  "dc:publisher": "City of Palo Alto",
    -  "dc:updated": "12/31/2010",
    -  "tableSchema": {
    -    "columns": [{
    -      "name": "GID",
    -      "titles": "GID",
    -      "datatype": "string",
    -    }, {
    -      "name": "on_street",
    -      "titles": "On Street",
    -      "datatype": "string"
    -    }, {
    -      "name": "species",
    -      "titles": "Species",
    -      "datatype": "string"
    -    }, {
    -      "name": "trim_cycle",
    -      "titles": "Trim Cycle",
    -      "datatype": "string"
    -    }, {
    -      "name": "inventory_date",
    -      "titles": "Inventory Date",
    -      "datatype": {
    -        "base": "date",
    -        "format": "M/d/yyyy"
    -      }
    -    }]
    -  }
    -}
    -

    - As before, the result would be a single table T with five columns and two rows. The table itself would have two annotations: -

    -
    -
    dc:publisher
    -
    {"@value": "City of Palo Alto"}
    -
    dc:updated
    -
    {"@value": "12/31/2010"}
    -
    -

    - The columns have the annotations shown in the following table: -

    - - - - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercellsnametitles
    C1T12C1.1, C2.1GIDGID
    C2T23C1.2, C2.2on_streetOn Street
    C3T34C1.3, C2.3speciesSpecies
    C4T45C1.4, C2.4trim_cycleTrim Cycle
    C5T56C1.5, C2.5inventory_dateInventory Date
    -

    - The rows have the annotations shown in the following table, exactly as in previous examples: -

    - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercells
    R1T16C1.1, C1.2, C1.3, C1.4, C1.5
    R2T27C2.1, C2.2, C2.3, C2.4, C2.5
    -

    - The cells have the annotations shown in the following table. Because of the way the particular tabular data format has been specified, these include additional annotations but also, for the Inventory Date column (cells C1.5 and C2.5), have a value that is a parsed date rather than an unparsed string. -

    - - - - - - - - - - - - - - - - - -
    idcore annotations
    tablecolumnrowstring valuevalue
    C1.1TC1R1"1""1"
    C1.2TC2R1"ADDISON AV""ADDISON AV"
    C1.3TC3R1"Celtis australis""Celtis australis"
    C1.4TC4R1"Large Tree Routine Prune""Large Tree Routine Prune"
    C1.5TC5R1"10/18/2010"2010-10-18
    C2.1TC1R2"2""2"
    C2.2TC2R2"EMERSON ST""EMERSON ST"
    C2.3TC3R2"Liquidambar styraciflua""Liquidambar styraciflua"
    C2.4TC4R2"Large Tree Routine Prune""Large Tree Routine Prune"
    C2.5TC5R2"6/2/2010"2010-06-02
    -
    -
    -
    -

    8.2.4 Parsing Multiple Header Lines

    -

    - The following example shows a CSV file with multiple header lines: -

    -
    Example 24: CSV file with multiple header lines
    Who,What,,Where,
    -Organization,Sector,Subsector,Department,Municipality
    -#org,#sector,#subsector,#adm1,#adm2
    -UNICEF,Education,Teacher training,Chocó,Quidbó
    -UNICEF,Education,Teacher training,Chocó,Bojayá
    -

    - Here, the first line contains some grouping titles in the first line, which are not particularly helpful. The lines following those contain useful titles for the columns. Thus the appropriate configuration for a dialect description is: -

    -
    Example 25: Dialect description for multiple header lines
    {
    -  "skipRows": 1,
    -  "headerRowCount": 2
    -}
    -

    - With this configuration, the table model contains five columns, each of which have two titles, summarized in the following table: -

    - - - - - - - - - - - - -
    idcore annotations
    tablenumbersource numbercellstitles
    C1T11C1.1, C2.1Organization, #org
    C2T22C1.2, C2.2Sector, #sector
    C3T33C1.3, C2.3Subsector, #subsector
    C4T44C1.4, C2.4Department, #adm1
    C5T55C1.5, C2.5Municipality, #adm2
    -

    - As metadata, this would look like: -

    -
    Example 26: Extracted metadata
    {
    -  "tableSchema": {
    -    "columns": [
    -      { "titles": ["Organization", "#org"] },
    -      { "titles": ["Sector", "#sector"] },
    -      { "titles": ["Subsector", "#subsector"] },
    -      { "titles": ["Department", "#adm1"] },
    -      { "titles": ["Municipality", "#adm2"] },
    -    ]
    -  }
    -}
    -

    - A separate metadata file could contain just the second of each of these titles, for example: -

    -
    Example 27: Metadata file
    {
    -  "tableSchema": {
    -    "columns": [
    -      { "name": "org", "titles": #org" },
    -      { "name": "sector", "titles": #sector" },
    -      { "name": "subsector", "titles": #subsector" },
    -      { "name": "adm1", "titles": #adm1" },
    -      { "name": "adm2", "titles": #adm2" },
    -    ]
    -  }
    -}
    -

    - This enables people from multiple jurisdictions to use the same tabular data structures without having to use exactly the same titles within their documents. -

    -
    -
    -
    -
    -

    A. IANA Considerations

    - /.well-known/csvm -
    -
    URI suffix:
    -
    csvm
    -
    Change controller:
    -
    W3C
    -
    Specification document(s):
    -
    This document, section 5.3 Default Locations and Site-wide Location Configuration
    -
    -
    -
    -

    B. Existing Standards

    This section is non-normative.

    -

    - This appendix outlines various ways in which CSV is defined. -

    -
    -

    B.1 RFC 4180

    -

    - [RFC4180] defines CSV with the following ABNF grammar: -

    -
    file = [header CRLF] record *(CRLF record) [CRLF]
    -header = name *(COMMA name)
    -record = field *(COMMA field)
    -name = field
    -field = (escaped / non-escaped)
    -escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
    -non-escaped = *TEXTDATA
    -COMMA = %x2C
    -CR = %x0D
    -DQUOTE =  %x22
    -LF = %x0A
    -CRLF = CR LF
    -TEXTDATA =  %x20-21 / %x23-2B / %x2D-7E
    -        
    -

    - Of particular note here are: -

    -
      -
    • The production for TEXTDATA indicates that only non-control ASCII characters are permitted within a CSV file. This restriction is routinely ignored in practice, and is impractical on the international web.
    • -
    • Lines should be ended with CRLF. This makes it harder to produce CSV files on Unix-based systems where the usual line ending is LF.
    • -
    • The header line is optional; a header parameter on the media type indicates whether the header is present or not.
    • -
    • Fields may be escaped by wrapping them in double quotes; any double quotes within the field must be escaped with two double quotes ("").
    • -
    -
    -
    -

    B.2 Excel

    -

    - Excel is a common tool for both creating and reading CSV documents, and therefore the CSV that it produces is a de facto standard. -

    -
    Note

    - The following describes the behavior of Microsoft Excel for Mac 2011 with an English locale. Further testing is needed to see the behavior of Excel in other situations. -

    -
    -

    B.2.1 Saved CSV

    -

    - Excel generates CSV files encoded using Windows-1252 with LF line endings. Characters that cannot be represented within Windows-1252 are replaced by underscores. Only those cells that need escaping (e.g. because they contain commas or double quotes) are escaped, and double quotes are escaped with two double quotes. -

    -

    - Dates and numbers are formatted as displayed, which means that formatting can lead to information being lost or becoming inconsistent. -

    -
    -
    -

    B.2.2 Opened CSV

    -

    - When opening CSV files, Excel interprets CSV files saved in UTF-8 as being encoded as Windows-1252 (whether or not a BOM is present). It correctly deals with double quoted cells, except that it converts line breaks within cells into spaces. It understands CRLF as a line break. It detects dates (formatted as YYYY-MM-DD) and formats them in the default date formatting for files. -

    -
    -
    -

    B.2.3 Imported CSV

    -

    - Excel provides more control when importing CSV files into Excel. However, it does not properly understand UTF-8 (with or without BOM). It does however properly understand UTF-16 and can read non-ASCII characters from a UTF-16-encoded file. -

    -

    - A particular quirk in the importing of CSV is that if a cell contains a line break, the final double quote that escapes the cell will be included within it. -

    -
    -
    -

    B.2.4 Copied Tabular Data

    -

    - When tabular data is copied from Excel, it is copied in a tab-delimited format, with LF line breaks. -

    -
    -
    -
    -

    B.3 Google Spreadsheets

    -
    -

    B.3.1 Downloading CSV

    -

    - Downloaded CSV files are encoded in UTF-8, without a BOM, and with LF line endings. Dates and numbers are formatted as they appear within the spreadsheet. -

    -
    -
    -

    B.3.2 Importing CSV

    -

    - CSV files can be imported as UTF-8 (with or without BOM). CRLF line endings are correctly recognized. Dates are reformatted to the default date format on load. -

    -
    -
    -
    -

    B.4 CSV Files in a Tabular Data Package

    -

    - Tabular Data Packages place the following restrictions on CSV files: -

    -
    -

    As a starting point, CSV files included in a Tabular Data Package package must conform to the RFC for CSV (4180 - Common Format and MIME Type for Comma-Separated Values (CSV) Files). In addition:

    - -
      -
    • File names MUST end with .csv.

    • -
    • Files MUST be encoded as UTF-8.

    • -
    • -

      Files MUST have a single header row. This row MUST be the first row in the file.

      -
        -
      • Terminology: each column in the CSV file is termed a field and its name is the string in that column in the header row.

      • -
      • The name MUST be unique amongst fields, MUST contain at least one character, and MUST conform to the character restrictions defined for the name property.

      • -
      -
    • -
    • Rows in the file MUST NOT contain more fields than are in the header row (though they may contain less).

    • -
    • Each file MUST have an entry in the tables array in the datapackage.json file.

    • -
    • The resource metadata MUST include a tableSchema attribute whose value MUST be a valid schema description.

    • -
    • All fields in the CSV files MUST be described in the schema description.

    • -
    - -

    CSV files generated by different applications often vary in their syntax, e.g. use of quoting characters, delimiters, etc. To encourage conformance, CSV files in a CSV files in a Tabular Data Package SHOULD:

    - -
      -
    • Use "," as field delimiters.
    • -
    • Use CRLF (U+000D U+000A) or LF (U+000A) as line terminators.
    • -
    - -

    If a CSV file does not follow these rules then its specific CSV dialect MUST be documented. The resource - hash for the resource in the datapackage.json descriptor MUST:

    - - - -

    Applications processing the CSV file SHOULD read use the dialect of the CSV file to guide parsing.

    -
    -
    -
    Note
    -

    - To replicate the findings above, test files which include non-ASCII characters, double quotes, and line breaks within cells are: -

    - -
    -
    -
    -

    C. Acknowledgements

    This section is non-normative.

    -
    At the time of publication, the following individuals had participated in the Working Group, in the order of their first name: - Adam Retter, - Alf Eaton, - Anastasia Dimou, - Andy Seaborne, - Axel Polleres, - Christopher Gutteridge, - Dan Brickley, - Davide Ceolin, - Eric Stephan, - Erik Mannens, - Gregg Kellogg, - Ivan Herman, - Jeni Tennison, - Jeremy Tandy, - Jürgen Umbrich, - Rufus Pollock, - Stasinos Konstantopoulos, - William Ingram, and - Yakov Shafranovich. -
    -
    -
    -

    D. Changes from previous drafts

    -
    -

    D.1 Changes since the candidate recommendation of 16 July 2015

    -
      -
    • Use text/tab-separated-values instead of the un-registered text/tsv.
    • -
    • /.well-known/csvm has been registered at IANA
    • -
    -
    -
    -

    D.2 Changes since the working draft of 16 April 2015

    -
      -
    • Merging of metadata files has been removed as it was determined not to be necessary.
    • -
    • Embedded metadata now used for compatibility check only, or as metadata if no other is found.
    • -
    • The titles annotation has been added to rows, and a section added describing the way in which screen readers should announce rows and columns to users
    • -
    • A Datatype description may have an id annotation to reference an external datatype definition in XSD, OWL, or some other format.
    • -
    • Renamed the direction annotation to table direction.
    • -
    • The built-in locations for locating metadata files were removed in favor of a site-wide configuration file, which uses the original values for file-specific and directory-specific metadata locations as the default value. See section 5.3 Default Locations and Site-wide Location Configuration.
    • -
    • The pattern for numeric types is now a number format pattern rather than a regular expression.
    • -
    -
    -
    -

    D.3 Changes since the working draft of 08 January 2015

    -

    The document has undergone substantial changes since the last working draft. Below are some of the changes made:

    - -
    -
    - -

    E. References

    E.1 Normative references

    [BCP47]
    A. Phillips; M. Davis. Tags for Identifying Languages. September 2009. IETF Best Current Practice. URL: https://tools.ietf.org/html/bcp47 -
    [BIDI]
    Mark Davis; Aharon Lanin; Andrew Glass. Unicode Bidirectional Algorithm. 5 June 2014. Unicode Standard Annex #9. URL: http://www.unicode.org/reports/tr9/ -
    [ECMASCRIPT]
    ECMAScript Language Specification. URL: https://tc39.github.io/ecma262/ -
    [ISO8601]
    Representation of dates and times. International Organization for Standardization. 2004. ISO 8601:2004. URL: http://www.iso.org/iso/catalogue_detail?csnumber=40874 -
    [JSON-LD]
    Manu Sporny; Gregg Kellogg; Markus Lanthaler. JSON-LD 1.0. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/json-ld/ -
    [RFC2119]
    S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 -
    [RFC3968]
    G. Camarillo. The Internet Assigned Number Authority (IANA) Header Field Parameter Registry for the Session Initiation Protocol (SIP). December 2004. Best Current Practice. URL: https://tools.ietf.org/html/rfc3968 -
    [RFC4180]
    Y. Shafranovich. Common Format and MIME Type for Comma-Separated Values (CSV) Files. October 2005. Informational. URL: https://tools.ietf.org/html/rfc4180 -
    [RFC5785]
    M. Nottingham; E. Hammer-Lahav. Defining Well-Known Uniform Resource Identifiers (URIs). April 2010. Proposed Standard. URL: https://tools.ietf.org/html/rfc5785 -
    [UAX35]
    Mark Davis; CLDR committee members. Unicode Locale Data Markup Language (LDML). 15 March 2013. Unicode Standard Annex #35. URL: http://www.unicode.org/reports/tr35/tr35-31/tr35.html -
    [UNICODE]
    The Unicode Standard. URL: http://www.unicode.org/versions/latest/ -
    [URI-TEMPLATE]
    J. Gregorio; R. Fielding; M. Hadley; M. Nottingham; D. Orchard. URI Template. March 2012. Proposed Standard. URL: https://tools.ietf.org/html/rfc6570 -
    [tabular-metadata]
    Jeni Tennison; Gregg Kellogg. Metadata Vocabulary for Tabular Data. W3C Recommendation. URL: http://www.w3.org/TR/2015/REC-tabular-metadata-20151217/ -
    [xmlschema11-2]
    David Peterson; Sandy Gao; Ashok Malhotra; Michael Sperberg-McQueen; Henry Thompson; Paul V. Biron et al. W3C XML Schema Definition Language (XSD) 1.1 Part 2: Datatypes. 5 April 2012. W3C Recommendation. URL: http://www.w3.org/TR/xmlschema11-2/ -

    E.2 Informative references

    [EBNF-NOTATION]
    Tim Bray; Jean Paoli; C. Michael Sperberg-McQueen; Eve Maler; François Yergau. EBNF Notation. W3C Recommendation. URL: http://www.w3.org/TR/xml/#sec-notation -
    [RFC7111]
    M. Hausenblas; E. Wilde; J. Tennison. URI Fragment Identifiers for the text/csv Media Type. January 2014. Informational. URL: https://tools.ietf.org/html/rfc7111 -
    [UAX15]
    Mark Davis; Ken Whistler. Unicode Normalization Forms. 31 August 2012. Unicode Standard Annex #15. URL: http://www.unicode.org/reports/tr15 -
    [annotation-model]
    Robert Sanderson; Paolo Ciccarese; Benjamin Young. Web Annotation Data Model. 15 October 2015. W3C Working Draft. URL: http://www.w3.org/TR/annotation-model/ -
    [csv2json]
    Jeremy Tandy; Ivan Herman. Generating JSON from Tabular Data on the Web. W3C Recommendation. URL: http://www.w3.org/TR/2015/REC-csv2json-20151217/ -
    [csv2rdf]
    Jeremy Tandy; Ivan Herman; Gregg Kellogg. Generating RDF from Tabular Data on the Web. W3C Recommendation. URL: http://www.w3.org/TR/2015/REC-csv2rdf-20151217/ -
    [encoding]
    Anne van Kesteren; Joshua Bell; Addison Phillips. Encoding. 20 October 2015. W3C Candidate Recommendation. URL: http://www.w3.org/TR/encoding/ -
    [vocab-data-cube]
    Richard Cyganiak; Dave Reynolds. The RDF Data Cube Vocabulary. 16 January 2014. W3C Recommendation. URL: http://www.w3.org/TR/vocab-data-cube/ -
    \ No newline at end of file + \ No newline at end of file diff --git a/test/docs/metadata/tracking-compliance.html b/test/docs/metadata/tracking-compliance.html index 03a1369c3..4f8c2d035 100644 --- a/test/docs/metadata/tracking-compliance.html +++ b/test/docs/metadata/tracking-compliance.html @@ -417,700 +417,8 @@

    Table of Contents

    - - -
    -

    1. Scope

    - -

    Do Not Track is designed to provide users with a simple mechanism to - express a preference to allow or limit online tracking. Complying - with the user's preference as described in this document includes limits on - the collection, retention and use of data collected as a third party - to user actions and the sharing of data not - permanently de-identified.

    - -

    This specification is intended for compliance with expressed user - preferences via user agents that (1) can access - the general browsable Web; (2) have a user interface that satisfies the - requirements in Determining - User Preference in the [TPE] specification; and, (3) can implement - all of the [TPE] specification, including the mechanisms for - communicating a tracking status, and the user-granted exception - mechanism.

    - -

    It is outside the scope of this specification to control short-term, - transient collection and use of data, so long as the data is not shared - with a third party and is not used to build a profile about a user or - otherwise alter an individual user’s experience outside the current network - interaction. For example, the contextual customization of ads shown as part - of the same network interaction is not restricted by a DNT:1 - signal.

    -
    - -
    -

    2. Definitions

    - -
    -

    2.1 User

    - -

    A user is a natural person who is making, or has made, use - of the Web.

    -
    - -
    -

    2.2 User Agent

    - -

    The term user agent refers to any of the various client - programs capable of initiating HTTP requests, including but not limited - to browsers, spiders (web-based robots), command-line tools, native - applications, and mobile apps [RFC7230].

    -
    - -
    -

    2.3 Network Interaction

    - -

    A network interaction is a single HTTP request and its - corresponding response(s): zero or more interim (1xx) responses and a - single final (2xx-5xx) response.

    -
    - -
    -

    2.4 User Action

    - -

    A user action is a deliberate action by the user, via - configuration, invocation, or selection, to initiate a network - interaction. Selection of a link, submission of a form, and reloading a - page are examples of user actions.

    -
    - -
    -

    2.5 Party

    - -

    A party is a natural person, a legal entity, or a set of - legal entities that share common owner(s), common controller(s), and a - group identity that is easily discoverable by a user. Common branding or - providing a list of affiliates that is available via a link from a - resource where a party describes DNT practices are examples of ways to - provide this discoverability.

    -
    - -
    -

    2.6 Service Provider

    - -

    Access to Web resources often involves multiple parties that might - process the data received in a network interaction. For example, domain - name services, network access points, content distribution networks, load - balancing services, security filters, cloud platforms, and - software-as-a-service providers might be a party to a given network - interaction because they are contracted by either the user or the - resource owner to provide the mechanisms for communication. Likewise, - additional parties might be engaged after a network interaction, such as - when services or contractors are used to perform specialized data - analysis or records retention.

    - -

    For the data received in a given network interaction, a service - provider is considered to be the same party as its - contractee if the service provider:

    - -
      -
    1. processes the data on behalf of the contractee;
    2. - -
    3. ensures that the data is only retained, accessed, and used as - directed by the contractee;
    4. - -
    5. has no independent right to use the data other than in a - permanently de-identified form (e.g., for monitoring service - integrity, load balancing, capacity planning, or billing); and, -
    6. - -
    7. has a contract in place with the contractee which is consistent - with the above limitations.
    8. -
    -
    - -
    -

    2.7 First Party

    - -

    With respect to a given user action, a first party is a - party with which the user intends to interact, via one or more network - interactions, as a result of making that action. Merely hovering over, - muting, pausing, or closing a given piece of content does not constitute - a user's intent to interact with another party.

    - -

    In some cases, a resource on the Web will be jointly controlled by two - or more distinct parties. Each of those parties is considered a first - party to a given user action if a user would reasonably expect to - communicate with all of them when accessing that resource. For example, - prominent co-branding on the resource might lead a user to expect that - multiple parties are responsible for the content or functionality.

    - -

    Network interactions related to a given user action may not constitute - intentional interaction when, for example, the user is unaware or only - transiently informed of redirection or framed content.

    -
    - -
    -

    2.8 Third Party

    - -

    For any data collected as a result of one or more network interactions - resulting from a user's action, a third party is any party - other than that user, a first party for that user action, or a service - provider acting on behalf of either that user or that first party.

    -
    - -
    -

    2.9 De-identification

    - -

    Data is permanently de-identified when there exists a high - level of confidence that no human subject of the data can be identified, - directly or indirectly (e.g., via association with an identifier, user - agent, or device), by that data alone or in combination with other - retained or available information.

    - -
    -

    2.9.1 De-identification Considerations

    This section is non-normative.

    - -

    In this specification the term permanently de-identified is - used for data that has passed out of the scope of this specification - and can not, and will never, come back into scope. The organization - that performs the de-identification needs to be confident that the data - can never again identify the human subjects whose activity contributed - to the data. That confidence may result from ensuring or demonstrating - that it is no longer possible to:

    - -
      -
    • isolate some or all records which correspond to a device or - user;
    • - -
    • link two or more records (either from the same database or - different databases), concerning the same device or user;
    • - -
    • deduce, with significant probability, information about a device - or user.
    • -
    - -

    Regardless of the de-identification approach, unique keys can be - used to correlate records within the de-identified dataset, provided - the keys do not exist and cannot be derived outside the de-identified - dataset and have no meaning outside the de-identified dataset (i.e. no - mapping table can exist that links the original identifiers to the keys - in the de-identified dataset).

    - -

    In the case of records in such data that relate to a single user or - a small number of users, usage and/or distribution restrictions are - advisable; experience has shown that such records can, in fact, - sometimes be used to identify the user or users despite technical - measures taken to prevent re-identification. It is also a good practice - to disclose (e.g. in the privacy policy) the process by which - de-identification of these records is done, as this can both raise the - level of confidence in the process, and allow for for feedback on the - process. The restrictions might include, for example:

    - -
      -
    • technical safeguards that prohibit re-identification of - de-identified data;
    • - -
    • business processes that specifically prohibit re-identification - of de-identified data;
    • - -
    • business processes that prevent inadvertent release of - de-identified data;
    • - -
    • administrative controls that limit access to de-identified - data.
    • -
    - -

    Geolocation data (of a certain precision or over a period of time) - may itself identify otherwise de-identified data.

    -
    -
    - -
    -

    2.10 Tracking

    - -

    Tracking is the collection of data regarding a particular - user's activity across multiple distinct contexts and the retention, use, - or sharing of data derived from that activity outside the context in - which it occurred. A context is a set of resources that are - controlled by the same party or jointly controlled by a set of - parties.

    -
    - -
    -

    2.11 Collect, Use, Share

    - -

    A party collects data received in a network interaction if - that data remains within the party’s control after the network - interaction is complete.

    - -

    A party uses data if the party processes the data for any - purpose other than storage or merely forwarding it to another party.

    - -

    A party shares data if it transfers or provides a copy of - data to any other party.

    -
    -
    - -
    -

    3. Server Compliance

    - -
    -

    3.1 Indicating Compliance and Non-Compliance

    - -

    In order to indicate a party's compliance with a user's expressed - tracking preference as described in this specification for a given - resource, an origin server:

    - -
      -
    1. MUST conform to the origin server requirements of [TPE];
    2. - -
    3. MUST send a tracking status value other than ! (under - construction) or D (disregarding) for that resource; - and
    4. - -
    5. MUST send, in a tracking status representation applicable to that - resource, a compliance property that contains a reference to the - following URI: - -
      - http://www.w3.org/TR/2015/WD-tracking-compliance-20150714/ -
      -
    6. -
    - -

    When a user sends a DNT:0 signal, the user is expressing - a preference to allow tracking. This specification places no restrictions - on collection or use of data from network interactions with - DNT:0 signals. Note, however, that a party might be limited - by its own statements to the user regarding the DNT:0 - setting. For more information, see Section 4. Consent.

    - -

    A party to a given user action which receives a DNT:1 - signal and is tracking that action MUST indicate so to the user - agent. A party that is tracking a user with that user's consent to - override an expressed DNT:1 preference MUST indicate so with - the corresponding C or P - tracking status values. A party that is tracking a user for reasons - allowable under this specification (for example, for one of the permitted - uses described below) MUST use the T value. A party to a - given user action that is not engaged in tracking SHOULD use the - N value (a T value is also conformant but not - as informative).

    - -

    A party to a given user action that disregards a DNT:1 - signal MUST indicate that non-compliance to the user agent, using the - response mechanism defined in the [TPE] specification. The party MUST - provide information in its privacy policy listing the specific reasons - for not honoring the user's expressed preference. The party's - representation MUST be clear and easily discoverable.

    - -

    In the interest of transparency, especially where multiple reasons are - listed, a server might use the [TPE] qualifiers - or config - properties to indicate a particular reason for disregarding or steps to - address the issue. A user agent can parse this response to communicate - the reason to the user or direct the user to the relevant section of a - privacy policy. This document does not define specific qualifiers for - different reasons servers might have for disregarding signals.

    -
    - -
    -

    3.2 First Party Compliance

    - -

    With respect to a given user action, a first party to that action - which receives a DNT:1 signal MAY collect, retain and use - data received from those network interactions. This includes customizing - content, services and advertising with respect to those user actions.

    - - - -

    A first party to a given user action MUST NOT share data about those - network interactions with third parties to that action who are prohibited - from collecting data from those network interactions under this - specification. Data about the interaction MAY be shared with service - providers acting on behalf of that first party.

    - -

    Compliance rules in this section apply where a party determines that - it is a first party to a given user action — either because network - resources are intended only for use as a first party to a user action or - because the status is dynamically discerned. For cases where a party - later determines that data was unknowingly collected as a third party to - a user action, see Section 6. Unknowing Collection.

    - -

    A first party to a given user action MAY elect to follow the rules - defined under this specification for third parties.

    -
    - -
    -

    3.3 Third Party Compliance

    - -

    When a third party to a given user action receives a - DNT:1 signal in a related network interaction, that party - MAY collect and use data about those network interactions when:

    - -
      -
    1. a user has explicitly granted consent, as described below (Section - 4. Consent); -
    2. - -
    3. data is collected for the set of permitted uses described below - (Section 3.3.2 Permitted Uses); -
    4. - -
    5. or, the data is permanently de-identified as defined in this - specification. -
    6. -
    - -

    Other than under those enumerated conditions, that party:

    - -
      -
    1. MUST NOT collect data from this network interaction that would - result in data regarding this particular user being associated across - multiple contexts;
    2. - -
    3. MUST NOT retain, use, or share data from this particular user's - activity outside the context in which that activity occurred; and
    4. - -
    5. MUST NOT use data from network interactions with this particular - user in a different context. -
    6. -
    - - - -

    Outside the permitted uses and explicitly-granted exceptions listed - below, a third party to a given user action MUST NOT collect, share, or - associate with related network interactions any identifiers that identify - a specific user, user agent, or device. For example, a third party that - does not require unique user identifiers for one of the permitted uses - MUST NOT place a unique identifier in cookies or other browser-based - local storage mechanisms.

    - -
    -

    3.3.1 General Requirements for Permitted Uses

    - -

    Some collection and use of data by third parties to a given user - action is permitted, notwithstanding receipt of DNT:1 in a - network interaction, as enumerated below. Different permitted uses may - differ in their permitted items of data collection, retention times, - and consequences. In all cases, collection and use of data must be - reasonably necessary and proportionate to achieve the purpose for which - it is specifically permitted; unreasonable or disproportionate - collection, retention, or use are not “permitted uses”.

    - -
    Note

    The requirements in the following sub-sections apply to - a party that collects data for a permitted use and that would otherwise - be prohibited from collecting, retaining or using that data under the - third-party compliance requirements above. Where a first party to a - given user action, for example, collects some data for a purpose listed - among the permitted uses (e.g. security of network services), these - requirements do not apply.

    - -
    -
    3.3.1.1 No Secondary Uses
    - -

    A party MUST NOT use data collected for permitted uses for - purposes other than the permitted uses for which each datum was - permitted to be collected.

    -
    - -
    -
    3.3.1.2 Data Minimization, Retention and Transparency
    - -

    Data collected by a party for permitted uses MUST be minimized to - the data reasonably necessary for such permitted uses. Such data MUST NOT be retained any longer than is proportionate to, and reasonably - necessary for, such permitted uses. A party MUST NOT rely on unique - identifiers if alternative solutions are reasonably available.

    - -

    A party MUST publicly describe definite time periods for which - data collected for permitted uses are retained. The party MAY - enumerate different retention periods for different permitted uses. - Data MUST NOT be used for a permitted use once the data retention - period for that permitted use has expired. After there are no - remaining permitted uses for given data, the data MUST be deleted or - permanently de-identified.

    -
    - -
    -
    3.3.1.3 No Personalization
    - -

    A party that collects data for a permitted use MUST NOT use that - data to alter a specific user's online experience, except as - specifically permitted below.

    -
    - -
    -
    3.3.1.4 Reasonable Security
    - -

    A party that collects data for a permitted use MUST use reasonable - technical and organizational safeguards to prevent further processing - of data retained for permitted uses. While physical separation of - data maintained for permitted uses is not required, best practices - SHOULD be in place to ensure technical controls ensure access - limitations and information security.

    -
    -
    - -
    -

    3.3.2 Permitted Uses

    - -
    -
    3.3.2.1 Frequency Capping
    - -

    Regardless of the tracking preference expressed, data MAY be - collected, retained and used to limit the number of times that a user - sees a particular advertisement, often called frequency - capping, as long as the data retained do not reveal the user’s - browsing history.

    -
    - -
    -
    3.3.2.2 Financial Logging
    - -

    Regardless of the tracking preference expressed, data MAY be - collected and used for billing and auditing related to the - current network interaction and concurrent transactions. This may - include counting ad impressions to unique visitors, verifying - positioning and quality of ad impressions and auditing compliance - with this specification and other standards.

    -
    - -
    -
    3.3.2.3 Security
    - -

    Regardless of the tracking preference expressed, data MAY be - collected and used to the extent reasonably necessary to detect - security incidents, protect the service against malicious, - deceptive, fraudulent, or illegal activity, and prosecute those - responsible for such activity, provided that such data is not used - for operational behavior beyond what is reasonably necessary to - protect the service or institute a graduated response.

    - -

    When feasible, a graduated response to a detected security - incident is preferred over widespread data collection. In this - specification, a graduated response is a data minimization - methodology where actions taken are proportional to the problem or - risk being mitigated.

    - - -
    - -
    -
    3.3.2.4 Debugging
    - -

    Regardless of the tracking preference expressed, data MAY be - collected, retained and used for debugging purposes to - identify and repair errors that impair existing intended - functionality.

    -
    -
    - -
    -

    3.3.3 Qualifiers for Permitted Uses

    - -

    A party MAY indicate which of the listed permitted uses apply to - tracking of a user with the qualifiers - mechanism defined in the [TPE] document. While providing qualifiers - is OPTIONAL, a party that wishes to indicate particular permitted uses - MUST use the corresponding characters as indicated in the table - below.

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    qualifierpermitted use
    c - frequency capping -
    f - financial logging -
    s - security -
    d - debugging -
    - -

    A party MAY use multiple qualifiers to indicate that multiple - permitted uses of tracking might be ongoing and that each such use - conforms to any corresponding requirements. Where qualifiers are - present, a party MUST indicate all claimed permitted uses.

    - - -
    +
    + [CONTENT]
    -
    - -
    -

    4. Consent

    - -

    A party MAY engage in practices otherwise proscribed by this - specification when the user has given explicit and informed consent. After - consent is received, it might be subsequently registered through the - User-Granted Exceptions API defined in the companion [TPE] document or - recorded out of band using a different technology. A party MUST - indicate when it is relying on out of band consent to override a Do - Not Track preference, as described in the companion [TPE] document.

    - - - -
    -

    4.1 Transfer of consent to another party

    - -

    When a party requests consent from the user as described above, it - might include consent for sharing data with its service providers. This transitive - permission might apply even to those parties to which the user has - not separately granted consent to be tracked.

    - -

    A party that transfers consent in this way MUST ensure that its - service providers acknowledge this - consent by use of the corresponding - tracking status value of C and a qualifier - of t ("transferred").

    - - -
    -
    - -
    -

    5. Interaction with Existing User Privacy Controls

    - -

    Multiple systems may be setting, sending, and receiving DNT and/or - opt-out signals at the same time. As a result, it will be important to - ensure industry and web browser vendors are on the same page with respect - to honoring user choices in circumstances where "mixed signals" may be - received.

    - -

    As a general principle, more specific settings override less specific - settings, as where the specific consent in user-granted exceptions - overrides a general preference. If a party perceives a conflict between - settings, a party MAY seek clarification from the user or MAY honor the - more restrictive setting.

    -
    - -
    -

    6. Unknowing Collection

    - -

    If a party learns that it possesses data in violation of this - specification, it MUST, where reasonably feasible, delete or de-identify - that data at the earliest practical opportunity, even if it was previously - unaware of such information practices despite reasonable efforts to - understand its information practices.

    -
    - - - -
    -

    A. Acknowledgements

    - -

    This specification consists of input from many discussions within and - around the W3C Tracking Protection Working Group, along with written - contributions from: Haakon Flage Bratsberg (Opera Software), Amy Colando - (Microsoft), Rob van Eijk (Invited Expert), Roy T. Fielding (Adobe), Vinay - Goel (Adobe), Yianni Lagos (Future of Privacy Forum), Tom Lowenthal - (Mozilla), Ted Leung (The Walt Disney Company), Jonathan Mayer (Stanford), - Ninja Marnau (Invited Expert), Mike O'Neill (Baycloud Systems), Thomas - Roessler (W3C), Wendy Seltzer (W3C), Rob Sherman (Facebook), John M. - Simpson (Invited Expert), David Singer (Apple), Kevin G. Smith (Adobe), - Vincent Toubiana (Invited Expert), Rigo Wenning (W3C), and Shane Wiley - (Yahoo!). The co-chairs of the group have helped guide those discussions: - Justin Brookman (CDT), Carl Cargill (Adobe), - Aleecia M. McDonald (Stanford), Matthias Schunter (Intel), and Peter Swire - (Invited Expert).

    - -

    Many thanks to Robin Berjon for ReSpec.

    -
    - -

    B. References

    B.1 Normative references

    [RFC7230]
    R. Fielding, Ed.; J. Reschke, Ed.. Hypertext Transfer Protocol (HTTP/1.1): Message Syntax and Routing. June 2014. Proposed Standard. URL: https://tools.ietf.org/html/rfc7230 -
    [TPE]
    Roy T. Fielding; David Singer. Tracking Preference Expression (DNT). 24 April 2014. W3C Last Call Working Draft. URL: http://www.w3.org/TR/tracking-dnt/ -
    \ No newline at end of file + \ No newline at end of file diff --git a/test/docs/metadata/ttml-imsc1.html b/test/docs/metadata/ttml-imsc1.html index b50571311..27f729a8a 100644 --- a/test/docs/metadata/ttml-imsc1.html +++ b/test/docs/metadata/ttml-imsc1.html @@ -454,3622 +454,8 @@

    Table of Contents

    - - -
    -

    1. Scope

    - -

    This document specifies two profiles of [TTML1]: a text-only profile and an image-only profile. These profiles are - intended for subtitle and caption delivery worldwide, including dialog language translation, content description, captions for - deaf and hard of hearing, etc.

    - -

    The text profile is a syntactic superset of [ttml10-sdp-us], and a document can simultaneously conform to both [ttml10-sdp-us] and the text-only profile.

    - -

    The document defines extensions to [TTML1], as well as incorporates extensions specified in [ST2052-1] and - [EBU-TT-D].

    -
    - -
    -

    2. Documentation Conventions

    - -

    This specification uses the same conventions as [TTML1] for the specification of parameter attributes, styling attributes and metadata elements. In particular, Section 2.3 of [TTML1] specifies conventions used in the XML representation of elements.

    - -

    All content of this specification that is not explicitly marked as non-normative is considered to be normative. If a section or appendix header contains the expression "non-normative", then the entirety of the section or appendix is considered non-normative.

    - -

    This specification uses Feature and Extension - designations as defined in Appendices D.1 and E.1 at [TTML1]:

    -
      -
    • when making reference to content conformance, - these designations refer to the syntactic expression or the semantic - capability associated with each designated Feature or - Extension; and
    • -
    • when making reference to processor - conformance, these designations refer to processing - requirements associated with each designated Feature or - Extension.
    • -
    - -

    If the name of an element referenced in this specification is not namespace qualified, then the TT namespace applies (see 6.3 Namespaces.)

    - -
    - -
    -

    3. Terms and Definitions

    - -

    Default Region. See Section 9.3.1 at [TTML1].

    - -

    Document Instance. See Section 2.2 at [TTML1].

    - -

    Extension. See Section 2.2 at [TTML1].

    - -

    Feature. See Section 2.2 at [TTML1].

    - -

    Intermediate Synchronic Document. See Section 9.3.2 at [TTML1].

    - -

    Document Interchange Context. See Section 2.2 at [TTML1].

    - -

    Document Processing Context. See Section 2.2 at [TTML1].

    - -

    Processor. Either a Presentation processor or a Transformation processor.

    - -

    Presentation processor. See Section 2.2 at [TTML1].

    - -

    Transformation processor. See Section 2.2 at [TTML1].

    - -

    Related Media Object. See Section 2.2 at [TTML1].

    - -

    Related Video Object. A Related Media Object that consists of a sequence of image frames, each a rectangular array of pixels.

    - -

    Text Alternative. As defined in [WCAG20].

    - -
    - -

    4. Conformance

    -

    - As well as sections marked as non-normative, all authoring guidelines, diagrams, examples, - and notes in this specification are non-normative. Everything else in this specification is - normative. -

    -

    The key words MAY, SHALL, SHALL NOT, SHOULD, and SHOULD NOT are - to be interpreted as described in [RFC2119]. -

    - - -

    A Document Instance that conforms to a profile defined herein:

    - -
      -
    • SHALL satisfy all normative provisions specified by the profile;
    • - -
    • MAY include any vocabulary, syntax or attribute value associated with a Feature or - Extension whose disposition is permitted in the profile;
    • - -
    • SHALL NOT include any vocabulary, syntax or attribute value associated with a Feature or Extension - whose disposition is prohibited in the profile.
    • - -
    - -
    Note

    A Document Instance, by definition, satisfies the requirements of Section 3.1 at [TTML1], - and hence a Document Instance that conforms to a profile defined herein is also a conforming TTML1 Document Instance.

    - -

    A presentation processor that conforms to a profile defined in this specification:

    - -
      -
    • SHALL satisfy the Generic Processor Conformance requirements at Section 3.2.1 of [TTML1];
    • - -
    • SHALL satisfy all normative provisions specified by the profile; and
    • - -
    • SHALL implement presentation semantic support for every Feature and Extension designated as permitted by the profile, subject to - any additional constraints on each Feature and Extension as specified by the profile.
    • -
    - -

    A transformation processor that conforms to a profile defined in this specification:

    - -
      -
    • SHALL satisfy the Generic Processor Conformance requirements at Section 3.2.1 of [TTML1];
    • - -
    • SHALL satisfy all normative provisions specified by the profile; and
    • - -
    • SHALL implement transformation semantic support for every Feature and Extension designated as permitted by the profile, subject to - any additional constraints on each Feature and Extension as specified by the profile.
    • -
    - -
    Note

    The use of the term presentation processor (transformation processor) within this specification - does not imply conformance to the DFXP Presentation Profile (DFXP Transformation Profile) specified in [TTML1]. In other - words, it is not considered an error for a presentation processor (transformation processor) to conform to a - profile defined in this specification without also conforming to the DFXP Presentation Profile (DFXP Transformation - Profile).

    - -
    Note

    This specification does not specify presentation processor or transformation processor behavior when processing or transforming a non-conformant Document Instance.

    - -
    Note

    The permitted and prohibited - dispositions do not refer to the specification of a - ttp:feature or ttp:extension element as being - permitted or prohibited within a ttp:profile element.

    - -
    - -
    -

    5. Profiles

    - -
    -

    5.1 General

    - -

    Notwithstanding special cases, e.g. a Document Instance that contains no p, span, br element and no smpte:backgroundImage attribute, it is generally not possible to construct a Document Instance that conforms to the Text Profile and Image Profile simultaneously, and it is not possible to construct a Document Instance that results in the presentation of both text data and image data.

    - -

    In applications that require subtitle/caption content in image form to be simultaneously available in text form, two - distinct Document Instances, one conforming to the Text Profile and the other conforming to the Image Profile, - SHOULD be offered. In addition, the Text Profile Document Instance SHOULD be associated with the Image Profile - Document Instance such that, when image content is encountered, assistive technologies have access to its corresponding text - form. The method by which this association is made is left to each application.

    - -
    Note

    The ittm:altText element specified 6.7.4 ittm:altText also allows text equivalent - string to be associated with an image, e.g. to support indexation of the content and also facilitate quality checking of the - document during authoring.

    - -

    Annex D. WCAG Considerations specifically discusses this specification in the context of the [WCAG20] - guidelines.

    -
    - -
    -

    5.2 Text Profile

    - -

    The Text Profile consists of Sections 6. Common Constraints and 7. Text Profile Constraints.

    -
    - -
    -

    5.3 Image Profile

    - -

    The Image Profile consists of Sections 6. Common Constraints and 8. Image Profile Constraints.

    -
    - - -
    -

    5.4 Profile Resolution Semantics

    - - -

    For the purpose of content processing, the determination of the resolved - profile SHOULD take into account both the signaled profile, as defined - in 6.9 Profile Signaling, and profile metadata, as designated by either (or both) - the Document Interchange Context or (and) the Document Processing - Context, which MAY entail inspecting document content.

    - -

    If the resolved profile is not a profile supported by the Processor - but is feasibly interoperable with the Text Profile, then the resolved - profile is the Text Profile; otherwise, if the resolved profile is not - a profile supported by the Processor but is feasibly interoperable with - the Image Profile, then the resolved profile is the Image Profile.

    - - -

    If the resolved profile is a profile supported by the Processor, then - the Processor SHOULD process the Document Instance according to the - resolved profile. If the resolved profile is neither Text Profile nor - Image Profile, processing is outside the scope of this specification.

    - - -

    If the resolved profile is undetermined or not supported by the - Processor, then the Processor SHOULD nevertheless process the Document - Instance using one of its supported profiles, with a preference for the - Text Profile over the Image Profile; otherwise, processing MAY be - aborted.

    - - -
    -
    - -
    -

    6. Common Constraints

    - -
    -

    6.1 Document Encoding

    - -

    A Document Instance SHALL use UTF-8 character encoding as specified in [UNICODE].

    -
    - -
    -

    6.2 Foreign Element and Attributes

    - -

    A Document Instance MAY contain elements and attributes that are neither specifically permitted nor forbidden by a - profile.

    - -

    A transformation processor SHOULD preserve such elements or attributes whenever possible.

    - -
    Note

    Document Instances remain subject to the structural requirements of [TTML1].

    -
    - -
    -

    6.3 Namespaces

    - -

    The following namespaces (see [xml-names]) are used in this specification:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    NamePrefixValueDefining Specification
    XMLxmlhttp://www.w3.org/XML/1998/namespace[xml-names]
    TTtthttp://www.w3.org/ns/ttml[TTML1]
    TT Parameterttphttp://www.w3.org/ns/ttml#parameter[TTML1]
    TT Stylingttshttp://www.w3.org/ns/ttml#styling[TTML1]
    TT Featurenonehttp://www.w3.org/ns/ttml/feature/[TTML1]
    SMPTE-TT Extensionsmptehttp://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt[ST2052-1]
    EBU-TT Stylingebuttsurn:ebu:tt:style[EBU-TT-D]
    EBU-TT Metadataebuttmurn:ebu:tt:metadata[EBU-TT-D]
    IMSC 1.0 Stylingittshttp://www.w3.org/ns/ttml/profile/imsc1#stylingThis specification
    IMSC 1.0 Parameterittphttp://www.w3.org/ns/ttml/profile/imsc1#parameterThis specification
    IMSC 1.0 Metadataittmhttp://www.w3.org/ns/ttml/profile/imsc1#metadataThis specification
    IMSC 1.0 Extensionnonehttp://www.w3.org/ns/ttml/profile/imsc1/extension/This specification
    IMSC 1.0 Text Profile Designatornonehttp://www.w3.org/ns/ttml/profile/imsc1/textThis specification
    IMSC 1.0 Image Profile Designatornonehttp://www.w3.org/ns/ttml/profile/imsc1/imageThis specification
    - -

    The namespace prefix values defined above are for convenience and Document Instances MAY use any prefix value that - conforms to [xml-names].

    - -

    The namespaces defined by this specification are mutable [namespaceState]; all undefined names in these namespaces are reserved for future standardization by the W3C.

    -
    - -
    -

    6.4 Overflow

    - -

    A Document Instance SHOULD be authored assuming strict clipping of content that falls out of region areas, regardless of - the computed value of tts:overflow for the region.

    - -
    Note

    As specified in [TTML1], tts:overflow has no effect on the extent of the region, and hence - the total normalized drawing area S(En) at 9.3 Paint Regions.

    -
    - - - -
    -

    6.6 Synchronization

    - -

    Each intermediate synchronic document of the Document Instance is intended to be displayed on a specific frame and - removed on a specific frame of the Related Video Object.

    - -

    When mapping a media time expression M to a frame F of a Related Video Object, e.g. for the purpose of rendering a - Document Instance onto the Related Video Object, the presentation processor SHALL map M to the frame F with the - presentation time that is the closest to, but not less, than M.

    - - - -
    Note

    In typical scenario, the same video program (the Related Video Object) will be used for Document Instance - authoring, delivery and user playback. The mapping from media time expression to Related Video Object above allows the author - to precisely associate subtitle video content with video frames, e.g. around scene transitions. In circumstances where the - video program is downsampled during delivery, the application can specify that, at playback, the relative video object be - considered the delivered video program upsampled to is original rate, thereby allowing subtitle content to be rendered at the - same temporal locations it was authored.

    - -
    - -
    -

    6.7 Extensions

    - -
    -

    6.7.1 ittp:aspectRatio

    - -

    The ittp:aspectRatio attributes allows authorial control of the mapping of the root container of a Document Instance to each image frame of the Related Video Object.

    - -

    If present, the ittp:aspectRatio attribute SHALL conform to the following syntax:

    - - - - - - - -
    -
    -
    ittp:aspectRatio
    -  : numerator denominator          // with int(numerator) != 0 and int(denominator) != 0
    -                                   // where int(s) parses string s as a decimal integer.
    -
    -numerator | denominator
    -  : <digit>+
    -
    -
    -
    - -

    The root container of a Document Instance SHALL be mapped to each image frame of the Related Video Object - according to the - following:

    - -
      -
    1. -

      If ittp:aspectRatio is present, the root container SHALL be mapped to a rectangular area within the - image frame such that:

      - -
        -
      1. the ratio of the width to the height of the rectangular area is equal to ittp:aspectRatio,
      2. - -
      3. the center of the rectangular area is collocated with the center of the image frame,
      4. - -
      5. the rectangular area is entirely within the image frame, and
      6. - -
      7. the rectangular area has a height or width equal to that of the image frame.
      8. -
      -
    2. - -
    3. -

      Otherwise, the root container of a Document Instance SHALL be mapped to the image frame in its - entirety.

      -
    4. -
    - -

    An ittp:aspectRatio attribute is considered to be significant only when specified on the tt - element.

    - - - -
    Note

    -The ittp:aspectRatio parameter effectively defines the intended display aspect ratio (DAR) of the root container, while - the tts:extent style property on the root element effectively defines the intended storage aspect ratio (SAR) of the root container. -

    - -
    Note
    The mapping algorithm above allows the author to - precisely control caption/subtitle position relative to elements within each frame of the video program, e.g. - to match the position of actors. This mapping algorithm does not however specify the presentation of - either the video frame or root container on the ultimate display device. This presentation depends on many factors, including - user input, and can involve displaying only parts of the content. Authors are therefore encouraged to follow best practices - for the intended target applications. Below are selected examples: -
      -
    • A 16:9 video program is authored to ensure adequate presentation on 4:3 display devices using a center-cut. - Accordingly subtitle/captions are authored using ttp:aspectRatio="4 3", allowing the combination - to be displayed on both 4:3 and 16:9 display devices while preserving both caption/subtitles content and the relative position - of caption/subtitles with video elements. -
    • -
    • - A playback system zooms the content of example (a) to fill a 21:9 display, perhaps as instructed by the user. The system elects to scale - the root container to fit vertically within the display (maintaining its aspect ratio as authored), at the cost of losing relative - positioning between caption/subtitles and video elements. -
    • -
    • - The system described in (b) instead elects to map the root container to the video frame, maintaining relative positioning - between caption/subtitles and video elements but at the risk of clipping subtitles/captions. -
    • -
    -
    - -
    -
    -

    6.7.2 ittp:progressivelyDecodable

    - -

    A progressively decodable Document Instance is structured to facilitate presentation before the document is - received in its entirety, and can be identified using ittp:progressivelyDecodable attribute.

    - -

    A progressively decodable Document Instance is a Document Instance that conforms to the following:

    - -
      -
    1. no attribute or element of the TTML timing vocabulary is present within the head element;
    2. - -
    3. given two intermediate synchronic documents A and B - of the Document Instance, with start times TA and TB, respectively, TA is not greater than - TB if A includes a p element that lexically precedes any p - element that B includes; -
    4. - -
    5. no attribute of the TTML timing vocabulary is present on a descendant element of p; and
    6. - -
    7. no element E1 explicitly references another element E2 where the opening tag of E2 is lexically subsequent to the opening tag of E1.
    8. -
    - -

    If present, the ittp:progressivelyDecodable attribute SHALL conform to the following syntax:

    - - - - - - - -
    -
    -
    ittp:progressivelyDecodable
    -  : "true"
    -  | "false"
    -
    -
    -
    - -

    An ittp:progressivelyDecodable attribute is considered to be significant only when specified on the - tt element.

    - -

    If not specified, the value of ittp:progressivelyDecodable SHALL be considered to be equal to "false".

    - -

    A Document Instance for which the computed value of ittp:progressivelyDecodable is "true" SHALL be a - progressively decodable Document Instance.

    - -

    A Document Instance for which the computed value of ittp:progressivelyDecodable is "false" is neither - asserted to be a progressively decodable Document Instance nor asserted not to be a progressively decodable Document Instance.

    - -
    Example 3
    <tt
    -  xmlns="http://www.w3.org/ns/ttml"
    -  xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
    -  xmlns:tts="http://www.w3.org/ns/ttml#styling"
    -  xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
    -  xmlns:ittp="http://www.w3.org/ns/ttml/profile/imsc1#parameter"
    -  ittp:progressivelyDecodable="true"
    -  ttp:profile="..."
    - >
    - ...
    -</tt>
    - -
    Note
    -

    [TTML1] specifies explicitly referencing of elements identified using xml:id in the following - circumstances:

    - -
      -
    • an element in body referencing region elements. In this case, Requirement 4 above is - always satisfied.
    • - -
    • an element in body referencing style elements. In this case, Requirement 4 above is - always satisfied.
    • - -
    • a region element referencing style elements. In this case, Requirement 4 above is always - satisfied.
    • - -
    • a style element referencing other style elements. In this case, Requirement 4 provides an - optimization of style element ordering within the head element.
    • - -
    • a ttm:actor element referencing a ttm:agent element. In this case, Requirement 4 provides - optimization of metadata elements ordering within the document.
    • - -
    • a content element referencing ttm:agent elements using the ttm:agent attribute. In this - case, Requirement 4 provides optimization of metadata elements ordering within the document.
    • -
    -
    -
    - -
    -

    6.7.3 itts:forcedDisplay

    - -

    itts:forcedDisplay can be used to hide content whose computed value of tts:visibility is "visible" when the processor has been configured to do so via the application parameter displayForcedOnlyMode.

    - -

    If and only if the value of displayForcedOnlyMode is "true", a content element with a itts:forcedDisplay - computed value of "false" SHALL NOT produce any visible rendering, but still affect layout, regardless of the computed - value of tts:visibility.

    - -

    The itts:forcedDisplay attribute SHALL conform to the following:

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Values:false | true
    Initial:false
    Applies to:body, div, p, region, span
    Inherited:yes
    Percentages:N/A
    Animatable:discrete
    - -

    Annex C. Forced content (non-normative) illustrates the use of itts:forcedDisplay in an application in which a - single document contains both hard of hearing captions and translated foreign language subtitles, using - itts:forcedDisplay to display translation subtitles always, independently of whether the hard of hearing - captions are displayed or hidden.

    - -

    The presentation processor SHALL accept an optional boolean parameter called displayForcedOnlyMode, - whose value MAY be set by a context external to the presentation processor. If not set, the value of - displayForcedOnlyMode SHALL be assumed to be equal to "false".

    - -

    The algorithm for setting the displayForcedOnlyMode parameter based on the circumstances under which the - Document Instance is presented is left to the application.

    - -
    Example 4
    ...
    -<head>
    -	...
    -	<region xml:id="r1" tts:origin="10% 2%" tts:extent="80% 10%" tts:color="white" itts:forcedDisplay="true" tts:backgroundColor="black"/>
    -	<region xml:id="r2" tts:origin="10% 80%" tts:extent="80% 88%" tts:color="white" tts:backgroundColor="black"/>
    -	...
    -</head>
    -...
    -<div>
    -	 <p region="r1" begin="1s" end="6s">Lycée</p>
    -
    -	 <!-- the following will not appear if displayForcedOnlyMode='true' -->
    -	 <p region="r2" begin="4s" end="6s">Nous étions inscrits au même lycée.</p>
    -</div>
    -...
    - - -
    Note

    As specified in [TTML1], the background of a region can be visible even if the computed value of tts:visibility equals - "hidden" for all active content within. The background of a region for which itts:forcedDisplay equals "true" can therefore remain visible even if itts:forcedDisplay equals "false" for all active - content elements within the region and displayForcedOnlyMode equals "true". Authors can avoid this situation, for instance, by ensuring that content - elements and the regions that they are flowed into always have the same value of itts:forcedDisplay.

    - -
    Note

    Although itts:forcedDisplay, like all the TTML style attributes, has no defined semantics on a - br content element, itts:forcedDisplay will apply to a br content element if it is - either defined on an ancestor content element of the br content element or it is applied to a region element - corresponding to a region that the br content element is being flowed into.

    - -
    Note

    It is expected that the functionality of itts:forcedDisplay will be mapped to a conditional - style construct in a future revision of this specification.

    - -
    Note

    The presentation semantics associated with itts:forcedDisplay are intended to be - compatible with those associated with the forcedDisplayMode attribute defined in [CFF].

    -
    - -
    -

    6.7.4 ittm:altText

    - -

    ittm:altText allows an author to provide a text string equivalent for an element, typically an image. This - text equivalent MAY be used to support indexing of the content and also facilitate quality checking of the document during - authoring.

    - -

    The ittm:altText element SHALL conform to the following syntax:

    - - - - - - - -
    -
    -
    <ittm:altText
    -  xml:id = ID
    -  xml:lang = string
    -  xml:space = (default|preserve)
    -  {any attribute not in the default namespace, any TT namespace or any IMSC 1.0 namespace}>
    -  Content: #PCDATA
    -</ittm:altText>
    -
    -
    -
    - -

    The ittm:altText element SHALL be a child of the metadata element.

    - -

    8. Image Profile Constraints specifies the use of the ittm:altText element with images.

    - -
    Example 5
    ...
    -<div region="r1" begin="1s" end="6s" smpte:backgroundImage="1.png">
    -  <metadata>
    -  <ittm:altText>Nous étions inscrits au même lycée.</ttm:title>
    -  </metadata>
    -</div>
    -...
    - -
    Note

    In contrast to the common use of alt attributes in [HTML5], the ittm:altText - attribute content is not intended to be displayed in place of the element if the element is not loaded. The - ittm:altText attribute content can however be read and used by assistive technologies.

    -
    - - - -
    - -
    -

    6.8 Region

    - -
    -

    6.8.1 Presented Region

    - -

    A presented region is a temporally active region that satisfies the following conditions:

    - -
      -
    1. the computed value of tts:opacity is not equal to "0.0"; and
    2. - -
    3. the computed value of tts:display is not "none"; and
    4. - -
    5. the computed value of tts:visibility is not "hidden"; and
    6. - -
    7. either (a) content is selected into the region or (b) the computed value of tts:showBackground is equal - to "always" and the computed value of tts:backgroundColor has non-transparent alpha.
    8. -
    -
    - -
    -

    6.8.2 Dimensions and Position

    - -

    All regions SHALL NOT extend beyond the root container, i.e. the intersection of the sets of coordinates belonging to a - region and the sets of coordinates belonging to the root container is the - set of coordinates belonging to the region.

    - -

    No two presented regions in a given intermediate synchronic document SHALL - overlap, i.e. the intersection of the sets of coordinates within each presented region is empty.

    -
    - -
    -

    6.8.3 Maximum number

    - -

    The number of presented regions in a given intermediate synchronic document SHALL NOT be greater than 4.

    -
    -
    - -
    -

    6.9 Profile Signaling

    - -

    - The ttp:profile attribute SHOULD be present on the tt element and equal to the designator of the IMSC1 profile to which the Document Instance conforms, and the ttp:profile element SHOULD NOT be present, unless:

    -
      -
    • - the Document Instance also conforms to [EBU-TT-D], in which case the ttp:profile attribute - and the ttp:profile element SHOULD NOT be present, and instead the designator of the IMSC1 profile to which the Document Instance conforms and - the URI "urn:ebu:tt:distribution:2014-01" SHOULD each be carried in an ebuttm:conformsToStandard element as specified in - [EBU-TT-D]; or -
    • -
    • - the Document Instance also conforms to [ttml10-sdp-us], in which case the ttp:profile attribute SHOULD NOT be present. [ttml10-sdp-us] requires that the ttp:profile element be present and that its use attribute be set to a specified value. -
    • -
    - - -

    - The ttp:profile and ebuttm:conformsToStandard elements SHALL NOT signal conformance to both Image Profile and Text Profile in a given Document Instance. -

    - -
    - -
    -

    6.10 Hypothetical Render Model

    - -

    It SHALL be possible to apply the Hypothetical Render Model specified in Section 9. Hypothetical Render Model to any sequence of consecutive intermediate synchronic documents without error as defined in Section 9.2 General.

    -
    - -
    -

    6.11 Features and Extensions

    - -

    See 4. Conformance for a definition of permitted and prohibited.

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    FeatureDispositionAdditional provision
    Relative to the TT Feature namespace
    #animationpermitted
    #backgroundColor-blockpermitted
    #backgroundColor-regionpermitted
    #cellResolutionpermittedIf the Document Instance includes any length value that uses the c expression, - ttp:cellResolution SHOULD be present on the tt element.
    #clockModeprohibited
    #clockMode-gpsprohibited
    #clockMode-localprohibited
    #clockMode-utcprohibited
    #corepermitted
    #display-blockpermitted
    #display-inlinepermitted
    #display-regionpermitted
    #displaypermitted
    #dropModeprohibited
    #dropMode-dropNTSCprohibited
    #dropMode-dropPALprohibited
    #dropMode-nonDropprohibited
    #extent-rootpermittedIf the Document Instance includes any length value that uses the px expression, - tts:extent SHALL be present on the tt element.
    #extentpermitted
    #frameRatepermittedIf the Document Instance includes any clock time expression that uses the frames term or any offset -time expression that uses the f metric, the ttp:frameRate attribute SHALL -be present on the tt element.
    #frameRateMultiplierpermitted
    #layoutpermitted
    #length-cellpermittedc units SHALL NOT be present outside of the value of ebutts:linePadding.
    #length-integerpermitted
    #length-negativeprohibited
    #length-percentagepermitted
    #length-pixelpermitted
    #length-positivepermitted
    #length-realpermitted
    #lengthpermitted
    #markerModeprohibited
    #markerMode-continuousprohibited
    #markerMode-discontinuousprohibited
    #metadatapermitted
    #opacitypermitted
    #originpermitted
    #overflowpermitted
    #overflow-visiblepermitted
    #pixelAspectRatioprohibited
    #presentationpermittedSee constraints applied to #profile.
    #profilepermitted - See 6.9 Profile Signaling. - -
    #showBackgroundpermitted
    #structurepermitted
    #styling-chainedpermitted
    #styling-inheritance-contentpermitted
    #styling-inheritance-regionpermitted
    #styling-inlinepermitted
    #styling-nestedpermitted
    #styling-referentialpermitted
    #stylingpermitted
    #subFrameRateprohibited
    #tickRatepermittedttp:tickRate SHALL be present on the tt element if the - document contains any time expression that uses the t metric.
    #timeBase-clockprohibited
    #timeBase-mediapermitted

    NOTE: [TTML1] specifies that the default timebase is "media" if - ttp:timeBase is not specified on tt.

    #timeBase-smpteprohibited
    #time-clock-with-framespermitted
    #time-clockpermitted
    #time-offset-with-framespermitted
    #time-offset-with-tickspermitted
    #time-offsetpermitted
    #timeContainerpermitted
    #timingpermitted -
    • All time expressions within a Document Instance SHOULD use the same syntax, either - clock-time or offset-time.
    • - -
    • For any content element that contains br elements or text nodes or a - smpte:backgroundImage attribute, the begin and end attributes SHOULD - be specified on the content element or at least one of its ancestors.
    -
    #transformationpermittedSee constraints at #profile.
    #visibility-blockpermitted
    #visibility-regionpermitted
    #writingMode-horizontal-lrpermitted
    #writingMode-horizontal-rlpermitted
    #writingMode-horizontalpermitted
    #zIndexpermitted
    ExtensionDispositionProvisions
    Relative to the IMSC 1.0 Extension namespace
    #aspectRatiopermitted
    #forcedDisplaypermitted
    #progressivelyDecodablepermitted
    #altTextpermitted
    - -
    Note

    As specified in [TTML1], a #time-offset-with-frames expression is translated to a media time - M according to M = 3600 · hours + 60 · minutes + seconds + (frames ÷ (ttp:frameRateMultiplier · - ttp:frameRate)).

    -
    -
    - -
    -

    7. Text Profile Constraints

    - -
    -

    7.1 Profile Designator

    - -

    This profile is associated with the following profile designator:

    - - - - - - - - - - - - - - - - - -
    Profile NameProfile Designator
    IMSC 1.0 Texthttp://www.w3.org/ns/ttml/profile/imsc1/text
    - -
    Note

    As specified in 6.11 Features and Extensions, the presence of the ttp:profile attribute is - not required by this profile. The profile designator specified above is intended to be generally used to signal conformance - of a Document Instance to the profile. The details of such signaling depends on the application, and can, for instance, use - metadata structures out-of-band of the Document Instance.

    -
    - - - -
    -

    7.3 Reference Fonts

    - -

    The flow of text within a region depends the dimensions and spacing (kerning) between individual glyphs. - The following allows, for instance, region extents to be set such that text flows without clipping.

    - -

    When rendering codepoints matching one of the combinations of computed font family and codepoints listed in - A. Reference Fonts, a processor SHALL use a font that generates a glyph sequence whose dimension is substantially - identical to the glyph sequence that would have been generated by one of the specified reference fonts.

    - -
    Note

    Implementations can use fonts other than those specified in A. Reference Fonts. Two fonts - with equal metrics can have a different appearance, but flow identically.

    - -
    - -
    -

    7.4 Features and Extensions

    - -

    See 4. Conformance for a definition of permitted and prohibited.

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    FeatureDispositionAdditional provisions
    Relative to the TT Feature namespace
    #backgroundColor-inlinepermitted
    #backgroundColorpermitted
    #bidipermitted
    #contentpermitted
    #colorpermitted

    The initial value of tts:color SHALL be "white".

    -

    NOTE: This is consistent with [ST2052-1].

    #directionpermitted
    #displayAlignpermitted
    #extent-regionpermittedThe tts:extent attribute SHALL be present on all region elements, where it - SHALL use either px units or "percentage" syntax.
    -
    #fontFamily-genericpermitted

    In absence of specific instructions on the choice of font families, and in - order to enhance reproducibility of line fitting, authors are encouraged to - use the monospaceSerif or proportionalSansSerif generic font families, - for which reference font metrics are defined at A. Reference Fonts.

    - -

    If the computed value of tts:fontFamily is "default", then the used value of tts:fontFamily SHALL be "monospaceSerif".

    - -

    NOTE: The term used value is defined in CSS 2.1, as normatively referenced by [TTML1].

    -
    #fontFamily-non-genericpermitted
    #fontFamilypermitted
    #fontSize-anamorphicprohibited
    #fontSize-isomorphicpermitted
    #fontSizeSee individual disposition of #fontSize-anamorphic and #fontSize-isomorphic.
    #fontStyle-italicpermitted
    #fontStyle-obliquepermitted
    #fontStylepermitted
    #fontWeight-boldpermitted
    #fontWeightpermitted
    #length-empermitted
    #lineBreak-uax14The processor SHALL implement the #lineBreak-uax14 feature defined in the TT Feature namespace.
    #lineHeightpermittedAs implementation of the "normal" value is not uniform at the time of this writing, tts:lineHeight SHOULD NOT be set to "normal" and SHOULD be explicitly specified such that the specified style set of each p element contains a tts:lineHeight property whose value is not assigned by initial value fallback.
    #nested-divpermitted
    #nested-spanpermitted
    #originpermittedThe tts:origin attribute SHALL use px units or "percentage" representation, and SHALL NOT - use em units.
    #padding-1permitted
    #padding-2permitted
    #padding-3permitted
    #padding-4permitted
    #paddingpermitted
    #textAlign-absolutepermitted
    #textAlign-relativepermitted
    #textAlignpermitted
    #textDecoration-overpermitted
    #textDecoration-throughpermitted
    #textDecoration-underpermitted
    #textDecorationpermitted
    #textOutline-blurredprohibited
    #textOutline-unblurredpermitted
    #textOutlinepermittedThe computed value of tts:textOutline on a span element - SHALL be 10% or less than the computed value of tts:fontSize on the same element.
    #unicodeBidipermitted
    #visibilitypermitted
    #visibility-inlinepermitted
    #wrapOptionpermitted
    #writingModepermitted
    #writingMode-verticalpermitted
    ExtensionDispositionProvisions
    Relative to the SMPTE-TT Extension Namespace
    #imageprohibited
    Relative to the IMSC 1.0 Extension namespace
    #linePaddingpermitted - -

    If used, the attribute ebutts:linePadding MAY be specified on elements region, body, - div and p in addition to style.

    - -

    The processor:

    -
      -
    • SHALL apply ebutts:linePadding to p only; and
    • -
    • SHALL treat ebutts:linePadding as inheritable.
    • -
    - -

    NOTE: The ebutts:linePadding attribute only supports c length units.

    - -
    #multiRowAlignpermitted -

    If used, the attribute ebutts:multiRowAlign MAY be specified on elements region, body, - div and p in addition to style

    - -

    The processor:

    -
      -
    • SHALL apply ebutts:multiRowAlign to p only; and
    • -
    • SHALL treat ebutts:multiRowAlign as inheritable.
    • -
    -
    - -
    Note

    In contrast to this specification, [EBU-TT-D] specifies that the attributes ebutts:linePadding and ebutts:multiRowAlign are allowed only on the style element.

    - -
    -
    - -
    -

    8. Image Profile Constraints

    - -
    -

    8.1 Profile Designator

    - -

    This profile is associated with the following profile designator:

    - - - - - - - - - - - - - - - -
    Profile NameProfile Designator
    IMSC 1.0 Imagehttp://www.w3.org/ns/ttml/profile/imsc1/image
    - -
    Note

    As specified in 6.11 Features and Extensions, the presence of the ttp:profile attribute is - not required by this profile. The profile designator specified above is intended to be generally used to signal conformance - of a Document Instance to the profile. The details of such signaling depends on the application, and can, for instance, use - metadata structures out-of-band of the Document Instance.

    -
    - -
    -

    8.2 Presented Image

    - -
    -

    8.2.1 Definition

    - -

    A presented image is a div element with a smpte:backgroundImage attribute that flows into a presented region.

    -
    - -
    -

    8.2.2 Constraints

    - -

    In a given intermediate synchronic document, each presented region SHALL contain at most one div element, which SHALL be a presented image.

    - -
    - -
    -

    8.2.3 Intermediate Synchronic Document Construction

    - -

    For the purposes of constructing an intermediate synchronic document, a div element with a smpte:backgroundImage attribute SHALL NOT be considered empty.

    - -
    -
    - -
    -

    8.3 smpte:backgroundImage Constraints

    - -

    If a smpte:backgroundImage attribute is applied to a div element:

    - -
      -
    • the width and height (in pixels) of the image source referenced by smpte:backgroundImage SHALL be equal - to the width and height (as specified by the tts:extent attribute using px units) of the region in which the - div element is presented;
    • - -
    • the div element SHOULD contain a metadata element containing an ittm:altText element that is a Text Alternative of the image referenced by the smpte:backgroundImage attribute; and
    • - -
    • The smpte:backgroundImage attribute SHALL reference a PNG datastream as specified in [PNG]. If a pHYs chunk is present, it SHALL indicate square pixels. Note that if no pixel aspect ratio is carried, the default of square pixels is assumed.
    • -
    - - -
    Note

    In [TTML1], tts:extent and tts:origin do not apply to div elements. In order to individually position multiple div elements, each div can be associated with a distinct region with the desired tts:extent and tts:origin.

    -
    - -
    -

    8.4 Features and Extensions

    - -

    See 4. Conformance for a definition of permitted and prohibited.

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    FeatureDispositionAdditional provisions
    Relative to the TT Feature namespace
    #backgroundColor-inlineprohibited
    #backgroundColorSee individual disposition of #backgroundColor-inline, #backgroundColor-region and #backgroundColor-block.
    #bidiSee individual disposition of #direction, #unicodeBidi and #writingMode-horizontal.
    #colorprohibited
    #contentpermittedThe p, span and br elements SHALL NOT be present. See Section 8.2.2 Constraints for constraints on div elements.
    #directionprohibited
    #displayAlignprohibited
    #extent-regionpermittedThe tts:extent attribute SHALL be present on all region elements, where it - SHALL use px units.
    #fontFamilyprohibited
    #fontFamily-genericprohibited
    #fontFamily-non-genericprohibited
    #fontSizeprohibited
    #fontSize-anamorphicprohibited
    #fontSize-isomorphicprohibited
    #fontStyleprohibited
    #fontStyle-italicprohibited
    #fontStyle-obliqueprohibited
    #fontWeightprohibited
    #fontWeight-boldprohibited
    #length-emprohibited
    #lineBreak-uax14No processor requirement is specified.
    #lineHeightprohibited
    #nested-divprohibited
    #nested-spanprohibited

    NOTE: The prohibition of span elements by this profile implies the prohibition of this feature.

    #paddingprohibited
    #padding-1prohibited
    #padding-2prohibited
    #padding-3prohibited
    #padding-4prohibited
    #textAlignprohibited
    #textAlign-absoluteprohibited
    #textAlign-relativeprohibited
    #textDecorationprohibited
    #textDecoration-overprohibited
    #textDecoration-throughprohibited
    #textDecoration-underprohibited
    #textOutlineprohibited
    #textOutline-blurredprohibited
    #textOutline-unblurredprohibited
    #unicodeBidiprohibited
    #visibilitySee individual disposition of #visibility-inline, - #visibility-region and #visibility-block.
    #visibility-inlineprohibited
    #wrapOptionprohibited
    #writingModeSee individual disposition of #writingMode-vertical and - #writingMode-horizontal.
    #writingMode-verticalprohibited
    ExtensionDispositionProvisions
    Relative to the SMPTE-TT Extension namespace
    #imagepermitted
      -
    • smpte:backgroundImage MAY be used according to 8.3 smpte:backgroundImage Constraints with the semantics of the attribute defined by Section 5.5.2 of [ST2052-1].
    • -
    • smpte:backgroundImageHorizontal and smpte:backgroundImageVertical SHALL NOT be used.
    • -
    • smpte:image SHALL NOT be used.
    -
    -
    - -
    -

    9. Hypothetical Render Model

    - -
    - -

    9.1 Overview (non-normative)

    - -

    This Section specifies the Hypothetical Render Model illustrated in Fig. 1 - Hypothetical Render Model - .

    - -

    The purpose of the model is to limit Document Instance complexity. It is not intended as a specification of the - processing requirements for implementations. For instance, while the model defines a glyph buffer for the purpose of - limiting the number of glyphs displayed at any given point in time, it neither requires the implementation of such a - buffer, nor models the sub-pixel character positioning and anti-aliased glyph rendering that can be used to produce text - output.

    - -
    - Hypothetical Render Model - -
    Fig. 1 - Hypothetical Render Model -
    -
    - -

    The model operates on successive intermediate synchronic documents - obtained from an input Document Instance, and uses a simple double buffering model: while an intermediate synchronic - document En is being painted into Presentation Buffer Pn (the "front buffer" of the model), the - previous intermediate synchronic document En-1 is available for display in Presentation Buffer - Pn-1 (the "back buffer" of the model).

    - -

    The model specifies an (hypothetical) time required for completely painting an intermediate synchronic document - as a proxy for complexity. Painting includes drawing region backgrounds, rendering and copying glyphs, and decoding and - copying images. Complexity is then limited by requiring that painting of intermediate synchronic document - En completes before the end of intermediate synchronic document En-1.

    - -

    Whenever applicable, constraints are specified relative to root container dimensions, allowing subtitle sequences to be - authored independently of Related Video Object resolution.

    - -

    To enable scenarios where the same glyphs are used in multiple successive intermediate synchronic documents, e.g. to convey a CEA-608/708-style roll-up (see - [CEA-608] and [CEA-708]), the Glyph Buffers Gn and Gn-1 store rendered glyphs across intermediate synchronic documents, allowing glyphs to be copied into the - Presentation Buffer instead of rendered, a more costly operation.

    - -

    Similarly, Decoded Image Buffers Dn and Dn-1 store decoded images across intermediate synchronic documents, allowing images to be copied into the Presentation Buffer instead of decoded.

    -
    - -
    -

    9.2 General

    - -

    The Presentation Compositor SHALL render in Presentation Buffer Pn each successive intermediate synchronic - document En using the following steps in order:

    - -
      -
    1. clear the pixels, except for the first intermediate synchronic document E0 for the which the pixels - of P0 SHALL be assumed to have been cleared; -
    2. - -
    3. paint, according to stacking order, all background pixels for each region;
    4. - -
    5. paint all pixels for background colors associated with text or image subtitle content; and
    6. - -
    7. paint the text or image subtitle content.
    8. -
    - -

    The Presentation Compositor SHALL start rendering En:

    - -
      -
    • at the presentation time of E0 minus Initial Painting Delay (IPD), if n = 0; or
    • - -
    • at the presentation time of En-1, if n > 0.
    • -
    - -

    The duration DUR(En) for painting an intermediate synchronic document En in the - Presentation Buffer Pn SHALL be:

    - -

    DUR(En) = S(En) / BDraw + DURT(En) + DURI(En)

    - -

    where

    - - - -

    The contents of the Presentation Buffer Pn SHALL be transferred instantaneously to Presentation Buffer - Pn-1 at the presentation time of intermediate synchronic document En, making the latter - available for display.

    - -
    Note

    It is possible for the contents of Presentation Buffer Pn-1 to never be displayed. This can - happen if Presentation Buffer Pn is copied twice to Presentation Buffer Pn-1 between two consecutive - video frame boundaries of the Related Video Object.

    - -

    It SHALL be an error for the Presentation Compositor to fail to complete painting pixels for En before the - presentation time of En.

    - -

    Unless specified otherwise, the following table SHALL specify values for IPD and BDraw.

    - - - - - - - - - - - - - - - - - - - - - - - -
    ParameterInitial value
    Initial Painting Delay (IPD)1 s
    Normalized background drawing performance factor (BDraw)12 s-1
    - -
    Note

    BDraw effectively sets a limit on fillings regions - for example, assuming that the root container is - ultimately rendered at 1920×1080 resolution, a BDraw of 12 s-1 would correspond to a fill rate of - 1920×1080×12/s=23.7×220pixels s-1.

    - -
    Note

    IPD effectively sets a limit on the complexity of any given intermediate synchronic document.

    -
    - -
    -

    9.3 Paint Regions

    - -

    The total normalized drawing area S(En) for intermediate synchronic document En SHALL - be

    - -

    S(En) = CLEAR(En) + PAINT(En )

    - -

    where CLEAR(E0) = 0 and CLEAR(En | n > 0) = 1, i.e. the root container in its entirety.

    - -
    Note

    To ensure consistency of the Presentation Buffer, a new intermediate synchronic document requires - clearing of the root container.

    - -

    PAINT(En) SHALL be the normalized area to be painted for all regions that are used in intermediate synchronic - document En according to:

    - -

    PAINT(En) = ∑Ri∈Rp NSIZE(Ri) ∙ NBG(Ri)

    - -

    where R_p SHALL be the set of presented regions in the intermediate synchronic - document En.

    - -

    NSIZE(Ri) SHALL be given by:

    - -

    NSIZE(Ri) = (width of Ri ∙ height of Ri ) ÷ (root container height ∙ root - container width)

    - - - -

    NBG(Ri) SHALL be the total number of tts:backgroundColor attributes associated with the given - region Ri in the intermediate synchronic document. A tts:backgroundColor attribute is - associated with a region when it is explicitly specified (either as an attribute in the element, or by reference to a - declared style) in the following circumstances:

    - -
      -
    • it is specified on the region layout element that defines the region; or
    • - -
    • it is specified on a div, p, span or br content element that is - to be flowed into the region for presentation in the intermediate synchronic document (see [TTML1] for more - details on when a content element is followed into a region); or -
    • - -
    • it is specified on a set animation element that is to be applied to content elements that are to be - flowed into the region for presentation in the intermediate synchronic document (see [TTML1] for more details - on when a set animation element is applied to content elements). -
    • -
    - -

    Even if a specified tts:backgroundColor is the same as specified on the nearest ancestor content element or - animation element, specifying any tts:backgroundColor SHALL require an additional fill operation for all - region pixels.

    -
    - -
    -

    9.4 Paint Images

    - -

    The Presentation Compositor SHALL paint into the Presentation Buffer Pn all visible pixels of presented - images of intermediate synchronic document En.

    - -

    For each presented image, the Presentation Compositor SHALL either:

    - -
      -
    • if an identical image is present in Decoded Image Buffer Dn, copy the image from Decoded Image Buffer - Dn to the Presentation Buffer Pn using the Image Copier; or
    • - -
    • if an identical image is present in Decoded Image Buffer Dn-1, i.e. an identical image was present in - intermediate synchronic document En-1, copy using the Image Copier the image from Decoded Image Buffer - Dn-1 to both the Decoded Image Buffer Dn and the Presentation Buffer Pn; or -
    • - -
    • otherwise, decode the image using the Image Decoder the image into the Presentation Buffer Pn and Decoded - Image Buffer Dn.
    • -
    - -

    Two images SHALL be identical if and only if they reference the same encoded image source.

    - -

    The duration DURI(En) for painting images of an intermediate synchronic document - En in the Presentation Buffer SHALL be as follows:

    - -

    DURI(En) = ∑Ii ∈ Ic NRGA(Ii) / ICpy + - ∑Ij ∈ Id NSIZ(Ij) / IDec

    - -

    where

    - - - -

    NRGA(Ii) is the Normalized Image Area of presented image Ii and SHALL be equal to:

    - -

    NRGA(Ii)= (width of Ii ∙ height of Ii ) ÷ ( root container height ∙ root - container width )

    - -

    NSIZ(Ii) SHALL be the number of pixels of presented image Ii.

    - -

    The contents of the Decoded Image Buffer Dn SHALL be transferred instantaneously to Decoded Image Buffer - Dn-1 at the presentation time of intermediate synchronic document En.

    - -

    The total size occupied by images stored in Decoded Image Buffers Dn or Dn-1 SHALL be the sum of - their Normalized Image Area.

    - -

    The size of Decoded Image Buffers Dn or Dn-1 SHALL be the Normalized Decoded Image Buffer Size - (NDIBS).

    - -

    Unless specified otherwise, the following table SHALL specify ICpy, IDec, and NDBIS.

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    ParameterInitial value
    Normalized image copy performance factor (ICpy)6
    Image Decoding rate (IDec)1 × 220 pixels s-1
    Normalized Decoded Image Buffer Size (NDIBS)0.9885
    -
    - -
    -

    9.5 Paint Text

    - -

    In the context of this section, a glyph is a tuple consisting of (i) one character and (ii) the computed values of the following - style properties:

    - -
      -
    • tts:color
    • - -
    • tts:fontFamily
    • - -
    • tts:fontSize
    • - -
    • tts:fontStyle
    • - -
    • tts:fontWeight
    • - -
    • tts:textDecoration
    • - -
    • tts:textOutline
    • -
    - - -
    Note

    While one-to-one mapping between characters and typographical glyphs is generally the rule in some scripts, - e.g. latin script, it is the exception in others. For instance, in arabic script, a character can - yield multiple glyphs depending on its position in a word. The Hypothetical Render Model - always assumes a one-to-one mapping, but reduces the performance of the glyph buffer for scripts where one-to-one mapping - is not the general rule (see GCpy below).

    - -

    For each glyph associated with a character in a presented region of intermediate synchronic document En, - the Presentation Compositor SHALL:

    - -
      -
    • if an identical glyph is present in Glyph Buffer Gn, copy the glyph from Glyph Buffer Gn to the - Presentation Buffer Pn using the Glyph Copier; or
    • - -
    • if an identical glyph is present in Glyph Buffer Gn-1, i.e. an identical glyph was present in intermediate - synchronic document En-1, copy using the Glyph Copier the glyph from Glyph Buffer Gn-1 to both the - Glyph Buffer Gn and the Presentation Buffer Pn; or
    • - -
    • otherwise render using the Glyph Renderer the glyph into the Presentation Buffer Pn and Glyph Buffer - Gn.
    • -
    - -
    - Example of Presentation Compositor Behavior for Text Rendering - -
    Fig. 2 - Example of Presentation Compositor Behavior for Text Rendering -
    -
    - -

    The duration DURT(En) for rendering the text of an intermediate synchronic document - En in the Presentation Buffer is as follows:

    - -

    DURT(En) = ∑gi ∈ Γr NRGA(gi) / Ren(gi) - + ∑gj ∈ Γc NRGA(gj) / GCpy

    - -

    where

    - - - -

    The Normalized Rendered Glyph Area NRGA(gi) of a glyph gi SHALL be equal to:

    - -

    NRGA(gi) = (fontSize of gi as percentage of root container height)2

    - -
    Note

    NRGA(Gi) does not take into account decorations (e.g. underline), effects (e.g. - outline) or actual typographical glyph aspect ratio. An implementation can determine an actual buffer size needs based on worst-case - glyph size complexity.

    - -

    The contents of the Glyph Buffer Gn SHALL be copied instantaneously to Glyph Buffer Gn-1 at the - presentation time of intermediate synchronic document En.

    - -

    It SHALL be an error for the sum of NRGA(gi) over all glyphs Glyph Buffer Gn to be larger - than the Normalized Glyph Buffer Size (NGBS).

    - -

    Unless specified otherwise, the following table specifies values of GCpy, Ren and NGBS.

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Normalized glyph copy performance factor (GCpy)
    Script property (see Standard Annex #24 at [UNICODE]) for the - character of giGCpy
    latin, greek, cyrillic, hebrew or common12
    any other value3
    Text rendering performance factor Ren(Gi)
    Block property (see [UNICODE]) for the character of giRen(Gi)
    CJK Unified Ideograph0.6
    any other value1.2
    Normalized Glyph Buffer Size (NGBS)
    1
    - -
    Note

    The choice of font by the presentation processor can increase rendering complexity. - For instance, a cursive font can generally result in a given character yielding different typographical glyphs depending - on context, even if latin script is used.

    - - - - - - -
    -
    - - -
    -

    A. Reference Fonts

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    Computed Font FamilyCode PointsReference Font
    monospaceSerif - All code points specified in B. Recommended Character Sets - - Courier New - or - Liberation Mono -
    proportionalSansSerif - All code points specified in B. Recommended Character Sets, excluding the code points - defined for Hebrew and Arabic scripts. - - Arial or Helvetica or Liberation Sans -
    - -
    - - - - -
    -

    C. Forced content (non-normative)

    - -

    Fig. 3 - Illustration of the use of itts:forcedDisplay - below illustrates the use of forced content, i.e. itts:forcedDisplay and - displayForcedOnlyMode. The content with itts:forcedDisplay="true" is the French translation of the - "High School" sign. The content with itts:forcedDisplay="false" are French subtitles capturing a voiceover.

    - -
    - Illustration of the use of itts:forcedDisplay - -
    Fig. 3 - Illustration of the use of itts:forcedDisplay -
    -
    - -

    When the user selects French as the playback language but does not select French subtitles, - displayForcedOnlyMode is set to "true", causing the display of the sign translation, which is useful to any French - speaker, but hiding the voiceover subtitles as the voiceover is heard in French.

    - -

    If the user selects French as the playback language and also selects French subtitles, e.g. if the user is hard-of-hearing, - displayForcedOnlyMode is set to "false", causing the display of both the sign translation and the voiceover - subtitles.

    - -

    The algorithm for setting the displayForcedOnlyMode parameter and selecting the appropriate combination of - subtitle and audio tracks depends on the application.

    -
    - -
    -

    D. WCAG Considerations

    - -

    In order to meet the guidelines in [WCAG20], the following considerations apply.

    - -

    Guideline 1.1 of [WCAG20] recommends that an implementation provide Text Alternatives for all non-text content. In the - context of this specification, this Text Alternative is intended primarily to support users of the subtitles who cannot see - images. Since the images of an Image Profile Document Instance usually represent subtitle or caption text, the - guidelines for authoring text equivalent strings given at Images of text of [HTML5] are - appropriate.

    - -

    Thus, for each subtitle in an Image Profile Document Instance, a text equivalent content in a Text Profile - Document Instance SHOULD be written so that it conveys all essential content and fulfills the same function as the - corresponding subtitle image. In the context of subtitling and captioning, this content will be (as a minimum) the verbatim - equivalent of the image without précis or summarization. However, the author MAY include extra information to the text - equivalent string in cases where styling is applied to the text image with a deliberate connotation, as a functional - replacement for the applied style.

    - -

    For instance, in subtitling and captioning, italics can be used to indicate an off screen speaker context (for example a - voice from a radio). An author can choose to include this functional information in the text equivalent; for example, by - including the word "Radio: " before the image equivalent text. Note that images in an Image Profile - Document Instance that are intended for use as captions, i.e. intended for a hard of hearing audience, might already - include this functional information in the rendered text.

    - -

    Guideline 1.1 of [WCAG20] also recommends that accessible Text Alternatives must be "programmatically determinable." This - means that the text must be able to be read and used by the assistive technologies (and the accessibility features in browsers) - that people with disabilities use. It also means that the user must be able to use their assistive technology to find the - alternative text (that they can use) when they land on the non-text content (that they can't use).

    -
    - -
    -

    E. Sample Document Instance (non-normative)

    - -

    The following sample Document Instances conforms to the Text Profile and Image Profile, respectively. These samples are for illustration only, and are neither intended to capture current or future practice, nor exercise all normative prose contained in this specification.

    - -
    Example 10
    <?xml version="1.0" encoding="UTF-8"?>
    -<tt xml:lang="en"
    -    xmlns="http://www.w3.org/ns/ttml"
    -    xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
    -    xmlns:tts="http://www.w3.org/ns/ttml#styling"
    -    xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
    -    xmlns:ittp="http://www.w3.org/ns/ttml/profile/imsc1#parameter"
    -    ittp:aspectRatio="4 3"
    -    ttp:profile="http://www.w3.org/ns/ttml/profile/imsc1/text">
    -
    -    <head>
    -        <layout>
    -            <region xml:id="area1" tts:origin="10% 10%" tts:extent="80% 10%" tts:backgroundColor="black" tts:displayAlign="center" tts:color="red"/>
    -        </layout>
    -    </head>
    -    <body>
    -        <div>
    -            <p region="area1" begin="0s" end="6s">Lorem ipsum dolor sit amet.</p>
    -        </div>
    -    </body>
    -</tt>
    - - -
    Example 11
    <?xml version="1.0" encoding="UTF-8"?>
    -<tt xml:lang="fr"
    -    xmlns="http://www.w3.org/ns/ttml"
    -    xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
    -    xmlns:tts="http://www.w3.org/ns/ttml#styling"
    -    xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
    -    xmlns:smpte="http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt"
    -    xmlns:itts="http://www.w3.org/ns/ttml/profile/imsc1#styling"
    -    tts:extent="640px 480px"
    -    ttp:frameRate="25"
    -    ttp:profile="http://www.w3.org/ns/ttml/profile/imsc1/image">
    -
    -    <head>
    -        <layout>
    -            <region xml:id="region1" tts:origin="120px 410px" tts:extent="240px 40px" tts:showBackground="whenActive"/>
    -            <region xml:id="region2" tts:origin="120px 20px" tts:extent="240px 40px" tts:showBackground="whenActive"/>
    -        </layout>
    -    </head>
    -    <body>
    -        <div region="region1" begin="00:00:01:00" end="00:00:02:00" smpte:backgroundImage="1.png"/>
    -        <div region="region1" begin="00:00:03:20" end="00:00:04:12" smpte:backgroundImage="2.png"/>
    -        <div region="region2" itts:forcedDisplay="true" begin="00:00:03:20" end="00:00:04:12" smpte:backgroundImage="3.png"/>
    -    </body>
    -</tt>
    -
    - -
    -

    F. Extensions

    - -
    -

    F.1 General

    - -

    The following sections define extension designations, expressed as relative URIs (fragment identifiers) relative to the - IMSC 1.0 Extension Namespace base URI.

    -
    - -
    -

    F.2 #progressivelyDecodable

    - -

    A transformation processor supports the #progressivelyDecodable feature if it recognizes and is - capable of transforming values of the ittp:progressivelyDecodable.

    - -

    A presentation processor supports the #progressivelyDecodable feature if it implements presentation - semantic support for values of the ittp:progressivelyDecodable - attribute.

    -
    - -
    -

    F.3 #aspectRatio

    - -

    A transformation processor supports the #aspectRatio feature if it recognizes and is capable of - transforming values of the ittp:aspectRatio.

    - -

    A presentation processor supports the #aspectRatio feature if it implements presentation semantic - support for values of the ittp:aspectRatio attribute.

    -
    - -
    -

    F.4 #forcedDisplay

    - -

    A transformation processor supports the #forcedDisplay feature if it recognizes and is capable of - transforming values of the itts:forcedDisplay.

    - -

    A presentation processor supports the #forcedDisplay feature if it implements presentation semantic - support for values of the itts:forcedDisplay attribute.

    -
    - -
    -

    F.5 #altText

    - -

    A transformation processor supports the #altText feature if it recognizes and is capable of - transforming values of the ittm:altText element.

    - -

    A presentation processor supports the #altText feature if it implements presentation semantic support - for values of the ittm:altText element.

    -
    - -
    -

    F.6 #linePadding

    - -

    A transformation processor supports the #linePadding feature if it recognizes and is capable of - transforming values of the ebutts:linePadding attribute specified in [EBU-TT-D].

    - -

    A presentation processor supports the #linePadding feature if it implements presentation semantic - support for values of the ebutts:linePadding attribute specified in [EBU-TT-D].

    -
    - -
    -

    F.7 #multiRowAlign

    - -

    A transformation processor supports the #multiRowAlign feature if it recognizes and is capable of - transforming values of the ebutts:multiRowAlign attribute specified in [EBU-TT-D].

    - -

    A presentation processor supports the #multiRowAlign feature if it implements presentation semantic - support for values of the ebutts:multiRowAlign attribute specified in [EBU-TT-D].

    +
    + [CONTENT]
    -
    - -
    -

    G. XML Schema Definitions (non-normative)

    - -

    XML Schema definitions (see [xmlschema-1]) for extension vocabulary defined - by this specification are provided here for convenience.

    - -

    These definitions are non-normative and are not sufficient to validate conformance of a Document Instance.

    - -

    In any case where a definition specified by this appendix diverge from the prose of the specification, - then the latter takes precedence.

    - - -
    - - -
    -

    H. Extensibility Objectives (non-normative)

    - -

    This section documents extensibility objectives for this specification.

    - -

    This specification is intended to allow:

    -
      -
    • other profiles of TTML and future revisions of this specification to specify support for documents and/or processors conforming to Text Profile or Image Profile, in addition to specifying additional extensions;
    • -
    • subject to the structural requirements of [TTML1], content from external namespaces to be present in a document that conforms to Text Profile or Image Profile (a) without affecting transformation or presentation, and (b) to be carried through by a transformation processor (see 6.2 Foreign Element and Attributes);
    • -
    • a document that conforms to Text Profile or Image Profile to be embedded in other XML documents.
    • -
    -
    - -
    -

    I. Compatibility with other TTML-based specifications (non-normative)

    - -
    -

    I.1 Overview

    - -

    This specification is designed to be compatible with [ST2052-1], [EBU-TT-D] and [ttml10-sdp-us]. - Specifically, it is possible to create a document that:

    - - -

    This specification is also intended to allow straightforward conversion of a document that conforms to the text or image profiles of [CFF] to the Text Profile or Image Profile, respectively.

    - -
    - - -
    -

    I.2 EBU-TT-D

    - -

    The Text Profile is a strict syntactic superset of [EBU-TT-D].

    - -

    A document that conforms to [EBU-TT-D] therefore generally also conforms to the Text Profile, with a few exceptions, including:

    - - -

    Note that the ttp:profile attribute is not allowed by [EBU-TT-D], and the ebuttm:conformsToStandard element is used instead to signal Text Profile, as specified in 6.9 Profile Signaling.

    - -

    It is not possible for a document that conforms to [EBU-TT-D] to also conform to Image Profile, and vice-versa, notwithstanding the special case where the document also conforms to Text Profile as noted at 5. Profiles.

    - -

    The following is an example of a document that conforms to both Text Profile and [EBU-TT-D]. Note the presence of two ebuttm:conformsToStandard elements, one of which equals the Text Profile designator:

    - -
    Example 12
    <?xml version="1.0" encoding="UTF-8"?>
    -<tt xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    -    xmlns="http://www.w3.org/ns/ttml" xmlns:ttp="http://www.w3.org/ns/ttml#parameter"
    -    xmlns:tts="http://www.w3.org/ns/ttml#styling" xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
    -    xmlns:ebutts="urn:ebu:tt:style" xml:lang="en" ttp:timeBase="media" xmlns:ebuttm="urn:ebu:tt:metadata" >
    -    <head>
    -        <metadata>
    -            <ebuttm:documentMetadata>
    -                <ebuttm:conformsToStandard>urn:ebu:tt:distribution:2014-01</ebuttm:conformsToStandard>
    -                <ebuttm:conformsToStandard>http://www.w3.org/ns/ttml/profile/imsc1/text</ebuttm:conformsToStandard>
    -            </ebuttm:documentMetadata>
    -        </metadata>
    -        <styling>
    -            <style xml:id="baseStyle" tts:color="#FFFFFF" tts:lineHeight="100%"/>
    -            <style xml:id="blackBackground" tts:backgroundColor="#000000"/>
    -            <style xml:id="greenBackground" tts:backgroundColor="#00FF00"/>
    -            <style xml:id="startEnd" tts:textAlign="start" ebutts:multiRowAlign="end"/>
    -            <style xml:id="centerStart" tts:textAlign="center" ebutts:multiRowAlign="start"/>
    -        </styling>
    -        <layout>
    -            <region xml:id="area1" tts:origin="15% 10%" tts:extent="70% 20%" style="greenBackground" tts:displayAlign="center"/>
    -            <region xml:id="area2" tts:origin="15% 70%" tts:extent="70% 20%" style="blackBackground" tts:displayAlign="center"/>
    -        </layout>
    -    </head>
    -    <body>
    -        <div style="baseStyle">
    -            <p xml:id="s1" region="area1" style="startEnd" begin="00:00:01" end="00:00:09">
    -                multiRowAlign="end"<br/>textAlign="start"
    -            </p>
    -            <p xml:id="s2" region="area2" style="centerStart" begin="00:00:01" end="00:00:09">
    -                multiRowAlign="start"<br/>textAlign="center"
    -            </p>
    -        </div>
    -    </body>
    -</tt>
    - -
    - -
    -

    I.3 SDP-US

    - -

    The Text Profile is a strict syntactic superset of [ttml10-sdp-us].

    - -

    A document that conforms to [ttml10-sdp-us] therefore also generally conforms to the Text Profile, with a few exceptions, including:

    -
      -
    • [ttml10-sdp-us] does not constrain document complexity using an HRM.
    • -
    - -

    [ttml10-sdp-us] requires a specific value of the use - attribute of the ttp:profile. As a result, Text Profile is - not signaled using the ttp:profile attribute. Instead, as - specified in 5.4 Profile Resolution Semantics, the Text Profile can be - signaled by the Document Interchange Context and/or the Document - Processing Context. Alternatively, a processor can choose to process a - document as a Text Profile document if the ttp:profile - element signals [ttml10-sdp-us], since [ttml10-sdp-us] is feasibly - interoperable with Text Profile.

    - - - -

    It is not possible for a document that conforms to [ttml10-sdp-us] to also conform to Image Profile, and vice-versa, notwithstanding the special case where the document also conforms to Text Profile as noted at 5. Profiles.

    - -

    As an illustration, Example 3 at [ttml10-sdp-us] conforms to both Text Profile and [ttml10-sdp-us].

    - -
    - -
    -

    I.4 SMPTE-TT (SMPTE ST 2052-1)

    - -

    [ST2052-1] specifies the use of the DFXP Full Profile (see Appendix F.3 at [TTML1]) supplemented by a number of extensions, including http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt#image.

    - -

    This specification defines practical constraints on [ST2052-1], supplemented by a few extensions defined at F. Extensions. These constraints and extensions are intended to reflect industry practice.

    - -

    As a result, particular care is required when creating a document intended to be processed according to both [ST2052-1] and Text Profile or Image Profile. In particular:

    -
      - -
    • in contrast to Text Profile and Image Profile, [ST2052-1] allows documents to contain both smpte:backgroundImage attributes and any of p, span, or br elements;
    • - -
    • Image Profile allows only a subset of the http://www.smpte-ra.org/schemas/2052-1/2010/smpte-tt#image extension;
    • - -
    • [ST2052-1] does not support the #aspectRatio, #forcedDisplay, - #linePadding and #multiRowAlign extensions that impact presentation; and
    • - -
    • when the designator "http://www.smpte-ra.org/schemas/2052-1/2010/profiles/smpte-tt-full" is used as a value for ttp:profile element or attribute (see Section 5.8 at [ST2052-1]), Text Profile or Image Profile is signaled by the Document Interchange Context and/or the Document Processing Context.
    • - -
    - - -

    The following is an example of a document that conforms to both Text Profile and [ST2052-1]:

    - -
    Example 13
    <?xml version="1.0" encoding="UTF-8"?>
    -<tt xml:lang="en" xmlns="http://www.w3.org/ns/ttml" xmlns:ttm="http://www.w3.org/ns/ttml#metadata"
    -    xmlns:ttp="http://www.w3.org/ns/ttml#parameter" ttp:profile="http://www.smpte-ra.org/schemas/2052-1/2010/profiles/smpte-tt-full"
    -    xmlns:tts="http://www.w3.org/ns/ttml#styling" ttp:frameRate="24">
    -    <head>
    -        <layout>
    -            <region xml:id="area1" tts:origin="10% 70%" tts:extent="80% 20%" tts:showBackground="whenActive" tts:backgroundColor="red" tts:displayAlign="center" tts:color="white"/>
    -        </layout>
    -    </head>
    -    <body tts:lineHeight="100%">
    -        <div>
    -            <p region="area1" begin="00:00:01.01" end="00:00:03">This should appear on frame 25.</p>
    -            <p region="area1" begin="00:00:04" end="00:00:06">This should appear on frame 96.</p>
    -            <p region="area1" begin="00:00:07.33" end="00:00:09">This should appear on frame 176.</p>
    -        </div>
    -    </body>
    -</tt>
    - -
    - -
    -

    I.5 CFF-TT

    - -

    This specification was derived from the text and image profiles - specified in Section 6 at [CFF], and is intended to be a superset in - terms of capabilities. Additional processing is however generally necessary to - convert a document from [CFF] to this specification. In particular:

    - -
      - -
    • the namespace of the progressivelyDecodable attribute is different;
    • - -
    • the forcedDisplayMode attribute in [CFF] is renamed to - forcedDisplay in this specification;
    • - -
    • the [CFF] HRM does not specifies GCpy as a function of script;
    • - -
    • in [CFF], the attribute ttp:frameRate is not subject to the requirements specified at 6.11 Features and Extensions; and
    • - -
    • [CFF] requires the use of the ttp:profile element, whereas this - specification recommends the use of the ttp:profile attribute.
    • - -
    - -
    - -
    - - -

    J. References

    J.1 Normative references

    [CLDR]
    Unicode Consortium. The Common Locale Data Repository Project -
    [EBU-TT-D]
    European Broadcasting Union (EBU). Tech 3380, EBU-TT-D Subtitling Distribution Format Version 1.0 -
    [PNG]
    Tom Lane. Portable Network Graphics (PNG) Specification (Second Edition). 10 November 2003. W3C Recommendation. URL: http://www.w3.org/TR/PNG -
    [RFC2119]
    S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://tools.ietf.org/html/rfc2119 -
    [ST2052-1]
    SMPTE ST 2052-1, Timed Text Format (SMPTE-TT) URL: https://www.smpte.org/standards -
    [TTML1]
    Glenn Adams. Timed Text Markup Language 1 (TTML1) (Second Edition). 24 September 2013. W3C Recommendation. URL: http://www.w3.org/TR/ttml1/ -
    [UNICODE]
    The Unicode Standard. URL: http://www.unicode.org/versions/latest/ -
    [WCAG20]
    Ben Caldwell; Michael Cooper; Loretta Guarino Reid; Gregg Vanderheiden et al. Web Content Accessibility Guidelines (WCAG) 2.0. 11 December 2008. W3C Recommendation. URL: http://www.w3.org/TR/WCAG20/ -
    [ttml10-sdp-us]
    Glenn Adams; Monica Martin; Sean Hayes. TTML Simple Delivery Profile for Closed Captions (US). 5 February 2013. W3C Note. URL: http://www.w3.org/TR/ttml10-sdp-us/ -
    [xml-names]
    Tim Bray; Dave Hollander; Andrew Layman; Richard Tobin; Henry Thompson et al. Namespaces in XML 1.0 (Third Edition). 8 December 2009. W3C Recommendation. URL: http://www.w3.org/TR/xml-names -

    J.2 Informative references

    [CEA-608]
    Line-21 Data Services, ANSI/CEA Standard. -
    [CEA-708]
    Digital Television (DTV) Closed Captioning, ANSI/CEA Standard. -
    [CFF]
    Digital Entertainment Content Ecosystem (DECE). Common File Format & Media Formats Specification (CFF) Version 2.2. -
    [HTML5]
    Ian Hickson; Robin Berjon; Steve Faulkner; Travis Leithead; Erika Doyle Navara; Edward O'Connor; Silvia Pfeiffer. HTML5. 28 October 2014. W3C Recommendation. URL: http://www.w3.org/TR/html5/ -
    [SUBM]
    World Wide Web Consortium (W3C). TTML Text and Image Profiles for Internet Media Subtitles and Captions (Member Submission, 07 June 2013) -
    [namespaceState]
    Norman Walsh. The Disposition of Names in an XML Namespace. 29 March 2006. W3C Working Draft. URL: http://www.w3.org/TR/namespaceState/ -
    [xmlschema-1]
    Henry Thompson; David Beech; Murray Maloney; Noah Mendelsohn et al. XML Schema Part 1: Structures Second Edition. 28 October 2004. W3C Recommendation. URL: http://www.w3.org/TR/xmlschema-1/ -
    + From a8e2efd6517a69ce5b17ccf48817e2fceef7f057 Mon Sep 17 00:00:00 2001 From: tripu Date: Mon, 21 Mar 2016 15:38:01 +0900 Subject: [PATCH 16/23] Test code and samples: add deliverer IDs stuff --- test/rules.js | 47 ++++++++++++++++++++++++++++++++++++++++++++++- test/samples.json | 6 ++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/test/rules.js b/test/rules.js index 2ecf1bad8..206c978ac 100644 --- a/test/rules.js +++ b/test/rules.js @@ -6,6 +6,7 @@ const DEBUG = false , META_PROFILE = 'profile' , META_DELIVERERS = 'deliverers' +, META_DELIVERER_IDS = 'delivererIDs' ; // Native packages: @@ -47,7 +48,33 @@ const equivalentDeliverers = function(a1, a2) { } } return (found === a1.length); - } else { + } + else { + return false; + } +}; + +/** + * Compare two arrays of "deliverer IDs" and check that they're equivalent. + * + * @param {Array} a1 - One array. + * @param {Array} a2 - The other array. + * @returns {Boolean} whether the two arrays contain exactly the same integers. + */ + +const equivalentDelivererIDs = function(a1, a2) { + if (a1 && a2 && a1.length === a2.length) { + var found = 0; + for(var i = 0; i < a1.length; i ++) { + for(var j = 0; j < a2.length && found === i; j ++) { + if (a1[i] === a2[j]) { + found++; + } + } + } + return (found === a1.length); + } + else { return false; } }; @@ -90,6 +117,18 @@ const compareMetadata = function(url, file, type, expectedValue) { specberus.extractMetadata(opts); }); } + else if (META_DELIVERER_IDS === type) { + it('Should find deliverer IDs of ' + (url ? url : file), function (done) { + handler.on('end-all', function () { + chai(specberus).to.have.property('meta').to.have.property('detectedDelivererIDs'); + chai(specberus.meta.detectedDelivererIDs).to.satisfy(function(found) { + return equivalentDelivererIDs(found, expectedValue); + }); + done(); + }); + specberus.extractMetadata(opts); + }); + } }; @@ -112,6 +151,9 @@ describe('Basics', function() { for(var i in samples) { compareMetadata(samples[i].url, null, META_DELIVERERS, samples[i].deliverers); } + for(var i in samples) { + compareMetadata(samples[i].url, null, META_DELIVERER_IDS, samples[i].delivererIDs); + } } else { for(var i in samples) { @@ -120,6 +162,9 @@ describe('Basics', function() { for(var i in samples) { compareMetadata(null, samples[i].file, META_DELIVERERS, samples[i].deliverers); } + for(var i in samples) { + compareMetadata(null, samples[i].file, META_DELIVERER_IDS, samples[i].delivererIDs); + } } }); diff --git a/test/samples.json b/test/samples.json index 2919e1e1f..13f0bb27b 100644 --- a/test/samples.json +++ b/test/samples.json @@ -9,6 +9,7 @@ , "homepage": "https://www.w3.org/WebPlatform/WG/" } ] + , "delivererIDs": [83482] } , { "url": "https://www.w3.org/TR/2016/PR-ttml-imsc1-20160308/" @@ -20,6 +21,7 @@ , "homepage": "http://www.w3.org/AudioVideo/TT/" } ] + , "delivererIDs": [34314] } , { "url": "https://www.w3.org/TR/2016/NOTE-csvw-ucr-20160225/" @@ -31,6 +33,7 @@ , "homepage": "http://www.w3.org/2013/csvw/" } ] + , "delivererIDs": [68238] } , { "url": "https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/" @@ -42,6 +45,7 @@ , "homepage": "http://www.w3.org/2013/csvw" } ] + , "delivererIDs": [68238] } , { "url": "https://www.w3.org/TR/2015/WD-tracking-compliance-20150714/" @@ -53,6 +57,7 @@ , "homepage": "http://www.w3.org/2011/tracking-protection/" } ] + , "delivererIDs": [49311] } , { "url": "https://www.w3.org/TR/2016/WD-mediacapture-depth-20160226/" @@ -68,5 +73,6 @@ , "homepage": "http://www.w3.org/2011/04/webrtc/" } ] + , "delivererIDs": [47318, 43696] } ] From ca706d709f5448ec0260c3734fee2a79f80cc773 Mon Sep 17 00:00:00 2001 From: tripu Date: Mon, 21 Mar 2016 15:39:27 +0900 Subject: [PATCH 17/23] Rule "group" was used solely for metadata; remove --- lib/profiles/base.js | 1 - lib/rules/heuristic/group.js | 45 ------------------------------------ 2 files changed, 46 deletions(-) delete mode 100644 lib/rules/heuristic/group.js diff --git a/lib/profiles/base.js b/lib/profiles/base.js index 17eed51e8..acb31f795 100644 --- a/lib/profiles/base.js +++ b/lib/profiles/base.js @@ -56,6 +56,5 @@ exports.rules = [ , require("../rules/validation/html") , require("../rules/validation/css") , require('../rules/validation/wcag') -, require("../rules/heuristic/group") , require('../rules/heuristic/date-format') ]; diff --git a/lib/rules/heuristic/group.js b/lib/rules/heuristic/group.js deleted file mode 100644 index fce61b850..000000000 --- a/lib/rules/heuristic/group.js +++ /dev/null @@ -1,45 +0,0 @@ - -'use strict'; - -exports.name = 'heuristic.group'; - -exports.check = function (sr, done) { - var patterns = /.+ interest group$|.+ community group$|.+ working group$/i - , candidates = [] - , candidate - , item - , i; - - sr.$('a').each(function () { - item = sr.$(this); - - if (patterns.exec(item.text())) { - candidate = {homepage: item.attr('href'), name: item.text()}; - - /* Temporarily disabled; see - https://github.com/w3c/specberus/pull/131#issuecomment-69811565 - and - https://github.com/w3c/specberus/issues/130#issuecomment-69754763 - - if (item.attr('data-deliverer-id') && /\d+/.test(item.attr('data-deliverer-id'))) { - candidate.id = item.attr('data-deliverer-id'); - } */ - - candidates.push(candidate); - } - }); - - if (candidates.length > 0) { - for (i = 0; i < candidates.length; i ++) { - sr.info(exports.name, 'candidate', {name: candidates[i].name, url: candidates[i].homepage}); - } - sr.metadata('deliverers', candidates); - } - else { - sr.error(exports.name, 'not-found'); - } - - return done(); - -}; - From 57b1260651e80fc3926a8cea4fe098f6e334ede6 Mon Sep 17 00:00:00 2001 From: tripu Date: Mon, 21 Mar 2016 15:41:02 +0900 Subject: [PATCH 18/23] Rule "deliverers" extended to return IDs too Some instances of "sr.metadata()" in other rules are now obsolete; remove those. --- lib/rules/headers/h2-status.js | 2 -- lib/rules/metadata/deliverers.js | 46 ++++++++++++++++++++++++-------- lib/rules/sotd/pp.js | 17 +++--------- 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/lib/rules/headers/h2-status.js b/lib/rules/headers/h2-status.js index 8d99713a0..c7e24a280 100644 --- a/lib/rules/headers/h2-status.js +++ b/lib/rules/headers/h2-status.js @@ -27,7 +27,6 @@ exports.check = function (sr, done) { var rx = new RegExp('^W3C ' + profiles.tracks[i].profiles[j].name + '( |,)', 'i'); if (rx.test(txt)) { profileFound = true; - sr.metadata('status', profiles.tracks[i].profiles[j].id); } j ++; } @@ -39,4 +38,3 @@ exports.check = function (sr, done) { done(); }; - diff --git a/lib/rules/metadata/deliverers.js b/lib/rules/metadata/deliverers.js index 3089def87..1878eb355 100644 --- a/lib/rules/metadata/deliverers.js +++ b/lib/rules/metadata/deliverers.js @@ -1,9 +1,13 @@ /** - * Pseudo-rule for metadata extraction: deliverers. + * Pseudo-rule for metadata extraction: deliverers (ID, name and home page). */ // Settings: -const REGEX_GROUP = /^.*[^\s]+\s+(interest|community|working)\s+group\s*$/i; +const REGEX_GROUP = /^.*[^\s]+\s+(interest|community|working)\s+group\s*$/i +, REGEX_DELIVERER_URL = /^((https?:)?\/\/)?(www\.)?w3\.org\/2004\/01\/pp-impl\/\d+\/status(#.*)?$/i +, REGEX_DELIVERER_TEXT = /^public\s+list\s+of\s+any\s+patent\s+disclosures(\s+\(.+\))?$/i +, REGEX_DELIVERER_ID = /pp-impl\/(\d+)\/status/i +; // Internal packages: const util = require('../../util'); @@ -12,27 +16,47 @@ exports.name = 'metadata.deliverers'; exports.check = function(sr, done) { - var result = [] - , found = {} + var groups = [] + , ids = [] ; - if (sr && sr.getSotDSection() && sr.getSotDSection().filter('p')) { + if (sr && sr.getSotDSection()) { + + var item + , found = {} + ; + sr.getSotDSection().filter('p').find('a[href]').each(function() { - const item = sr.$(this); + item = sr.$(this); if (REGEX_GROUP.test(item.text())) { const name = item.text().trim() , url = item.attr('href') ; if (!found[util.normaliseURI(url)]) { found[util.normaliseURI(url)] = true; - result.push({name: name, homepage: url}); + groups.push({name: name, homepage: url}); } } }); - done({detectedDeliverers: result}); - } - else { - done(); + + found = {}; + + sr.getSotDSection().find('a[href]').each(function() { + item = sr.$(this); + var href = item.attr('href') + , text = sr.norm(item.text()) + ; + if (REGEX_DELIVERER_URL.test(href) && REGEX_DELIVERER_TEXT.test(text)) { + var id = REGEX_DELIVERER_ID.exec(href); + if (id && id.length > 1 && !found[id[1]]) { + found[id] = true; + ids.push(parseInt(id[1], 10)); + } + } + }); + } + done({detectedDeliverers: groups, detectedDelivererIDs: ids}); + }; diff --git a/lib/rules/sotd/pp.js b/lib/rules/sotd/pp.js index 84733ef86..75c43afd1 100644 --- a/lib/rules/sotd/pp.js +++ b/lib/rules/sotd/pp.js @@ -69,9 +69,6 @@ function findPP ($candidates, sr) { exports.name = "sotd.pp"; exports.check = function (sr, done) { - // Pseudo-constants: - var DELIVERER_ID_REGEX = /pp-impl\/(\d+)\/status/; - var $sotd = sr.getSotDSection(); if (!$sotd || !$sotd.length) { sr.error(exports.name, "no-sotd"); @@ -93,7 +90,6 @@ exports.check = function (sr, done) { var $a = sr.$(this) , href = $a.attr("href") , text = sr.norm($a.text()) - , ids ; if ((href === "http://www.w3.org/Consortium/Patent-Policy-20040205/" || href === "https://www.w3.org/Consortium/Patent-Policy-20040205/") && @@ -102,15 +98,10 @@ exports.check = function (sr, done) { return; } if (/^https?:\/\/www\.w3\.org\/2004\/01\/pp-impl\/\d+\/status(#.*)?$/.test(href) && - /public list of any patent disclosures( \(.+\))?/.test(text)) { - ids = DELIVERER_ID_REGEX.exec(href); - if (ids && 2 === ids.length) { - sr.metadata('delivererIDs', parseInt(ids[1], 10)); - } - if ($a.attr("rel") === "disclosure") { - foundPublicList = true; - return; - } + /public list of any patent disclosures( \(.+\))?/.test(text) && + $a.attr("rel") === "disclosure") { + foundPublicList = true; + return; } if ((href === "http://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential" || href === "https://www.w3.org/Consortium/Patent-Policy-20040205/#def-essential") && From bb3c2ad1a53e07fc4838df4c622036e71878753b Mon Sep 17 00:00:00 2001 From: tripu Date: Mon, 21 Mar 2016 15:42:16 +0900 Subject: [PATCH 19/23] Update documentation about the JS API and methods --- README.md | 56 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index c869a570e..c3e9acc62 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,9 @@ The interface you get when you `require("specberus")` is that from `lib/validato `Specberus` instance that is properly configured for operation in the Node.js environment (there is nominal support for running Specberus under other environments, but it isn't usable at this time). -The validator interface supports a `validate(options)` methods, which takes an object with the -following fields: +### `validate(options)` + +This method takes an object with the following fields: * `url`: URL of the content to check. One of `url`, `source`, `file`, or `document` must be specified and if several are they will be used in this order. @@ -75,6 +76,34 @@ following fields: * `events`: An event sink which supports the same interface as Node.js's `EventEmitter`. Required. See below for the events that get generated. +### `extractMetadata(options)` + +This method returns a simple object with metadata inferred from the document. +The `options` accepted are equal to those in `validate()`, except that a `profile` is not necessary and will be ignored (to find out the profile is one of the +goals of this method). + +The returned `Object` may contain up to 3 properties: `detectedProfile`, `detectedDeliverers` and `detectedDelivererIDs`. +If some of these pieces of metadata cannot be deduced, that key will not exist, or its value will not be defined. + +An example: + +``json +{ + "detectedProfile": "WD", + "detectedDeliverers": [ + { + "name": "Device APIs Working Group", + "homepage":"http://www.w3.org/2009/dap/" + }, + { + "name": "Web Real-Time Communications Working Group", + "homepage": "http://www.w3.org/2011/04/webrtc/" + } + ], + "detectedDelivererIDs": [47318, 43696] +} +``` + ### Emitting metadata about the document Every time the validator finds/deduces a piece of metadata about the document, it emits a `metadata` event. @@ -86,17 +115,12 @@ These properties are now returned when found: * `docDate`: The date associated to the document. * `title`: The (possible) title of the document. * `process`: The process rules, **as they appear on the text of the document**, eg `'1 September 2015'`. -* `deliverers`: The deliverer(s) responsible for the document (WGs, TFs, etc); an `Array` of `Object`s, each one with these properties: - * `homepage`: URL of the group's home page. - * `name`: name of the group, exactly as it is found in the hyperlink on the document. -* `delivererIDs` ID(s) of the deliverer(s); an `Array` of `Number`s. * `thisVersion`: URL of this version of the document. * `previousVersion`: URL of the previous version of the document (the last one, if multiple are shown). * `latestVersion`: URL of the latest version of the document. * `editorIDs`: ID(s) of the editor(s) responsible for the document; an `Array` of `Number`s. * `editorsDraft`: URL of the latest editor's draft. * `shortname`: shortname extracted from latestVersion in the document; a `String`. -* `status`: ID (acronym) of the profile detected in the document; a `String`. See file `public/data/profiles.json`. As an example, validating [`http://www.w3.org/TR/2014/REC-exi-profile-20140909/`](http://www.w3.org/TR/2014/REC-exi-profile-20140909/) (REC) emits these pairs of metadata: @@ -108,13 +132,8 @@ emits these pairs of metadata: { latestVersion: 'http://www.w3.org/TR/exi-profile/' } { previousVersion: 'http://www.w3.org/TR/2014/PR-exi-profile-20140506/' } { editorIDs: [] } -{ status: 'REC' } { shortname: 'exi-profile'} { process: '1 September 2015' } -{ deliverers: [ - { homepage: 'http://www.w3.org/XML/EXI/', - name: 'Efficient XML Interchange Working Group' } - ] } ``` If you download that very spec, edit it to include the following metadata… @@ -133,13 +152,8 @@ If you download that very spec, edit it to include the following metadata&hellip { latestVersion: 'http://www.w3.org/TR/exi-profile/' } { previousVersion: 'http://www.w3.org/TR/2014/PR-exi-profile-20140506/' } { editorIDs: [ '329883', '387297' ] } -{ status: 'REC' } { shortname: 'exi-profile'} { process: '1 September 2015' } -{ deliverers: [ - { homepage: 'http://www.w3.org/XML/EXI/', - name: 'Efficient XML Interchange Working Group' } - ] } ``` Another example: when applied to [`http://www.w3.org/TR/wai-aria-1.1/`](http://www.w3.org/TR/wai-aria-1.1/) (WD), @@ -152,16 +166,9 @@ the following metadata will be found: { latestVersion: 'http://www.w3.org/TR/wai-aria-1.1/' } { previousVersion: 'http://www.w3.org/TR/2014/WD-wai-aria-1.1-20140612/' } { editorIDs: [] } -{ status: 'WD' } { shortname: 'wai-aria-1.1' } { process: '1 September 2015' } { editorsDraft: 'http://w3c.github.io/aria/aria/aria.html' } -{ deliverers: [ - { homepage: 'http://www.w3.org/WAI/PF/', - name: 'Protocols & Formats Working Group' }, - { homepage: 'http://www.w3.org/html/wg/', - name: 'HTML Working Group' } - ] } ``` ## Profiles @@ -243,4 +250,3 @@ The Specberus object exposes the following API that's useful for validation: * `getDocumentDate()`. Returns a Date object that matches the document's date as specified in the headers' h2. * `getDocumentDateElement()`. Returns the element that contains the document's date. - From 70d00d207975622e011a1d4d3ae4d5ef725516e4 Mon Sep 17 00:00:00 2001 From: tripu Date: Mon, 21 Mar 2016 15:47:03 +0900 Subject: [PATCH 20/23] Fix a couple of typos in documentation --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c3e9acc62..fbd7ad33d 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ This method takes an object with the following fields: ### `extractMetadata(options)` This method returns a simple object with metadata inferred from the document. -The `options` accepted are equal to those in `validate()`, except that a `profile` is not necessary and will be ignored (to find out the profile is one of the +The `options` accepted are equal to those in `validate()`, except that a `profile` is not necessary and will be ignored (finding out the profile is one of the goals of this method). The returned `Object` may contain up to 3 properties: `detectedProfile`, `detectedDeliverers` and `detectedDelivererIDs`. @@ -87,7 +87,7 @@ If some of these pieces of metadata cannot be deduced, that key will not exist, An example: -``json +```json { "detectedProfile": "WD", "detectedDeliverers": [ From d98944dce4a58b4e0202133aec206cc8cedea0e3 Mon Sep 17 00:00:00 2001 From: tripu Date: Mon, 21 Mar 2016 18:55:38 +0900 Subject: [PATCH 21/23] Delete comment left out from debugging --- lib/validator.js | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/validator.js b/lib/validator.js index c5043a1d7..ef44e6b3a 100644 --- a/lib/validator.js +++ b/lib/validator.js @@ -56,7 +56,6 @@ Specberus.prototype.extractMetadata = function (options) { profileMetadata.rules.forEach(function (rule) { rule.check(self, function (result) { if (result) { - // console.dir(result); for (var i in result) { self.meta[i] = result[i]; } From d504d847a96367dc3001cbaa21e5978eda33dc2c Mon Sep 17 00:00:00 2001 From: tripu Date: Mon, 21 Mar 2016 19:29:15 +0900 Subject: [PATCH 22/23] Remove "detectedDeliverers" from extracted metadata --- README.md | 12 +------- lib/rules/metadata/deliverers.js | 42 +++++++------------------- lib/util.js | 30 ------------------- test/rules.js | 51 +------------------------------- 4 files changed, 12 insertions(+), 123 deletions(-) delete mode 100644 lib/util.js diff --git a/README.md b/README.md index fbd7ad33d..41dfe27d9 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ This method returns a simple object with metadata inferred from the document. The `options` accepted are equal to those in `validate()`, except that a `profile` is not necessary and will be ignored (finding out the profile is one of the goals of this method). -The returned `Object` may contain up to 3 properties: `detectedProfile`, `detectedDeliverers` and `detectedDelivererIDs`. +The returned `Object` may contain up to 2 properties: `detectedProfile` and `detectedDelivererIDs`. If some of these pieces of metadata cannot be deduced, that key will not exist, or its value will not be defined. An example: @@ -90,16 +90,6 @@ An example: ```json { "detectedProfile": "WD", - "detectedDeliverers": [ - { - "name": "Device APIs Working Group", - "homepage":"http://www.w3.org/2009/dap/" - }, - { - "name": "Web Real-Time Communications Working Group", - "homepage": "http://www.w3.org/2011/04/webrtc/" - } - ], "detectedDelivererIDs": [47318, 43696] } ``` diff --git a/lib/rules/metadata/deliverers.js b/lib/rules/metadata/deliverers.js index 1878eb355..e7381b602 100644 --- a/lib/rules/metadata/deliverers.js +++ b/lib/rules/metadata/deliverers.js @@ -1,51 +1,29 @@ /** - * Pseudo-rule for metadata extraction: deliverers (ID, name and home page). + * Pseudo-rule for metadata extraction: deliverers' IDs. */ // Settings: -const REGEX_GROUP = /^.*[^\s]+\s+(interest|community|working)\s+group\s*$/i -, REGEX_DELIVERER_URL = /^((https?:)?\/\/)?(www\.)?w3\.org\/2004\/01\/pp-impl\/\d+\/status(#.*)?$/i +const REGEX_DELIVERER_URL = /^((https?:)?\/\/)?(www\.)?w3\.org\/2004\/01\/pp-impl\/\d+\/status(#.*)?$/i , REGEX_DELIVERER_TEXT = /^public\s+list\s+of\s+any\s+patent\s+disclosures(\s+\(.+\))?$/i , REGEX_DELIVERER_ID = /pp-impl\/(\d+)\/status/i ; -// Internal packages: -const util = require('../../util'); - exports.name = 'metadata.deliverers'; exports.check = function(sr, done) { - var groups = [] - , ids = [] - ; - - if (sr && sr.getSotDSection()) { - - var item - , found = {} - ; - - sr.getSotDSection().filter('p').find('a[href]').each(function() { - item = sr.$(this); - if (REGEX_GROUP.test(item.text())) { - const name = item.text().trim() - , url = item.attr('href') - ; - if (!found[util.normaliseURI(url)]) { - found[util.normaliseURI(url)] = true; - groups.push({name: name, homepage: url}); - } - } - }); + var ids = []; - found = {}; + if (sr && sr.getSotDSection() && sr.getSotDSection().find('a[href]')) { sr.getSotDSection().find('a[href]').each(function() { - item = sr.$(this); - var href = item.attr('href') + + var item = sr.$(this) + , href = item.attr('href') , text = sr.norm(item.text()) + , found = {} ; + if (REGEX_DELIVERER_URL.test(href) && REGEX_DELIVERER_TEXT.test(text)) { var id = REGEX_DELIVERER_ID.exec(href); if (id && id.length > 1 && !found[id[1]]) { @@ -57,6 +35,6 @@ exports.check = function(sr, done) { } - done({detectedDeliverers: groups, detectedDelivererIDs: ids}); + done({detectedDelivererIDs: ids}); }; diff --git a/lib/util.js b/lib/util.js deleted file mode 100644 index e12341c0d..000000000 --- a/lib/util.js +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Miscellaneous utilities, mostly String-related routines. - */ - -const REGEX_URI = /https?:\/\/(www\.)?((.+)[^\ \/])\/?$/i; - -/** - * Reduce a URI to its minimum expression, for easier comparison. - * - * This works heuristically; it strips a URI of the usual variants and converts it to lowercase - * ("www." at the beginning, "/" at the end) - * - * @param {String} uri - Original URI. - * @returns {String} The "normalised", (probably) equivalent URI. - */ - -const normaliseURI = function(uri) { - - var result = uri.trim().toLowerCase(); - const matches = REGEX_URI.exec(result); - - if (matches && matches.length > 2) { - result = matches[2]; - } - - return result; - -}; - -exports.normaliseURI = normaliseURI; diff --git a/test/rules.js b/test/rules.js index 206c978ac..c1a8ad37e 100644 --- a/test/rules.js +++ b/test/rules.js @@ -5,7 +5,6 @@ // Settings: const DEBUG = false , META_PROFILE = 'profile' -, META_DELIVERERS = 'deliverers' , META_DELIVERER_IDS = 'delivererIDs' ; @@ -24,36 +23,6 @@ const validation = require('./validation') , sink = require('../lib/sink') ; -/** - * Compare two arrays of "deliverers" and check that they're equivalent. - * - * @param {Array} a1 - One array. - * @param {Array} a2 - The other array. - * @returns {Boolean} whether the two structures are really the same. - */ - -const equivalentDeliverers = function(a1, a2) { - if (a1 && a2 && a1.length === a2.length) { - var j - , found = 0; - for(var i = 0; i < a1.length; i ++) { - j = 0; - while (i === found && j < a2.length) { - if (a1[i].name === a2[j].name && a1[i].homepage === a2[j].homepage) { - found++; - } - else { - j++; - } - } - } - return (found === a1.length); - } - else { - return false; - } -}; - /** * Compare two arrays of "deliverer IDs" and check that they're equivalent. * @@ -84,7 +53,7 @@ const equivalentDelivererIDs = function(a1, a2) { * * @param {String} url - public URL of a spec. * @param {String} file - name of local file containing a spec (without path and withouth ".html" suffix). - * @param {String} type - metadata to check: {"META_PROFILE", "META_DELIVERERS"}. + * @param {String} type - metadata to check: {"META_PROFILE", "META_DELIVERER_IDS"}. * @param {Object} expectedValue - value that is expected to be found. */ @@ -105,18 +74,6 @@ const compareMetadata = function(url, file, type, expectedValue) { specberus.extractMetadata(opts); }); } - else if (META_DELIVERERS === type) { - it('Should find deliverers of ' + (url ? url : file), function (done) { - handler.on('end-all', function () { - chai(specberus).to.have.property('meta').to.have.property('detectedDeliverers'); - chai(specberus.meta.detectedDeliverers).to.satisfy(function(found) { - return equivalentDeliverers(found, expectedValue); - }); - done(); - }); - specberus.extractMetadata(opts); - }); - } else if (META_DELIVERER_IDS === type) { it('Should find deliverer IDs of ' + (url ? url : file), function (done) { handler.on('end-all', function () { @@ -148,9 +105,6 @@ describe('Basics', function() { for(var i in samples) { compareMetadata(samples[i].url, null, META_PROFILE, samples[i].profile); } - for(var i in samples) { - compareMetadata(samples[i].url, null, META_DELIVERERS, samples[i].deliverers); - } for(var i in samples) { compareMetadata(samples[i].url, null, META_DELIVERER_IDS, samples[i].delivererIDs); } @@ -159,9 +113,6 @@ describe('Basics', function() { for(var i in samples) { compareMetadata(null, samples[i].file, META_PROFILE, samples[i].profile); } - for(var i in samples) { - compareMetadata(null, samples[i].file, META_DELIVERERS, samples[i].deliverers); - } for(var i in samples) { compareMetadata(null, samples[i].file, META_DELIVERER_IDS, samples[i].delivererIDs); } From ca80fbb84011141e97ac3f6d0cb2fc5762f16eb5 Mon Sep 17 00:00:00 2001 From: tripu Date: Tue, 22 Mar 2016 13:15:40 +0900 Subject: [PATCH 23/23] "Better remove 'detected' in the properties name." --- README.md | 6 +++--- lib/rules/metadata/deliverers.js | 2 +- lib/rules/metadata/profile.js | 2 +- test/rules.js | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 41dfe27d9..db081fa01 100644 --- a/README.md +++ b/README.md @@ -82,15 +82,15 @@ This method returns a simple object with metadata inferred from the document. The `options` accepted are equal to those in `validate()`, except that a `profile` is not necessary and will be ignored (finding out the profile is one of the goals of this method). -The returned `Object` may contain up to 2 properties: `detectedProfile` and `detectedDelivererIDs`. +The returned `Object` may contain up to 2 properties: `profile` and `delivererIDs`. If some of these pieces of metadata cannot be deduced, that key will not exist, or its value will not be defined. An example: ```json { - "detectedProfile": "WD", - "detectedDelivererIDs": [47318, 43696] + "profile": "WD", + "delivererIDs": [47318, 43696] } ``` diff --git a/lib/rules/metadata/deliverers.js b/lib/rules/metadata/deliverers.js index e7381b602..bbf264a4a 100644 --- a/lib/rules/metadata/deliverers.js +++ b/lib/rules/metadata/deliverers.js @@ -35,6 +35,6 @@ exports.check = function(sr, done) { } - done({detectedDelivererIDs: ids}); + done({delivererIDs: ids}); }; diff --git a/lib/rules/metadata/profile.js b/lib/rules/metadata/profile.js index 9e7d043de..02d583cad 100644 --- a/lib/rules/metadata/profile.js +++ b/lib/rules/metadata/profile.js @@ -39,7 +39,7 @@ exports.check = function(sr, done) { } }); if (id) { - done({detectedProfile: id}); + done({profile: id}); } else { done(); diff --git a/test/rules.js b/test/rules.js index c1a8ad37e..9bc7aa2ce 100644 --- a/test/rules.js +++ b/test/rules.js @@ -68,7 +68,7 @@ const compareMetadata = function(url, file, type, expectedValue) { if (META_PROFILE === type) { it('Should detect a ' + expectedValue, function (done) { handler.on('end-all', function () { - chai(specberus).to.have.property('meta').to.have.property('detectedProfile').equal(expectedValue); + chai(specberus).to.have.property('meta').to.have.property('profile').equal(expectedValue); done(); }); specberus.extractMetadata(opts); @@ -77,8 +77,8 @@ const compareMetadata = function(url, file, type, expectedValue) { else if (META_DELIVERER_IDS === type) { it('Should find deliverer IDs of ' + (url ? url : file), function (done) { handler.on('end-all', function () { - chai(specberus).to.have.property('meta').to.have.property('detectedDelivererIDs'); - chai(specberus.meta.detectedDelivererIDs).to.satisfy(function(found) { + chai(specberus).to.have.property('meta').to.have.property('delivererIDs'); + chai(specberus.meta.delivererIDs).to.satisfy(function(found) { return equivalentDelivererIDs(found, expectedValue); }); done();