circularize.js
1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/**
* In the recognition result of tesseract, there
* is a deep JSON object for details, it has around
*
* The result of dump.js is a big JSON tree
* which can be easily serialized (for instance
* to be sent from a webworker to the main app
* or through Node's IPC), but we want
* a (circular) DOM-like interface for walking
* through the data.
*
* @fileoverview DOM-like interface for walking through data
* @author Kevin Kwok <antimatter15@gmail.com>
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
module.exports = (page) => {
const blocks = [];
const paragraphs = [];
const lines = [];
const words = [];
const symbols = [];
page.blocks.forEach((block) => {
block.paragraphs.forEach((paragraph) => {
paragraph.lines.forEach((line) => {
line.words.forEach((word) => {
word.symbols.forEach((sym) => {
symbols.push({
...sym, page, block, paragraph, line, word,
});
});
words.push({
...word, page, block, paragraph, line,
});
});
lines.push({
...line, page, block, paragraph,
});
});
paragraphs.push({
...paragraph, page, block,
});
});
blocks.push({
...block, page,
});
});
return {
...page, blocks, paragraphs, lines, words, symbols,
};
};