textwrap.js
17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
// Partial port of python's argparse module, version 3.9.0 (only wrap and fill functions):
// https://github.com/python/cpython/blob/v3.9.0b4/Lib/textwrap.py
'use strict'
/*
* Text wrapping and filling.
*/
// Copyright (C) 1999-2001 Gregory P. Ward.
// Copyright (C) 2002, 2003 Python Software Foundation.
// Copyright (C) 2020 argparse.js authors
// Originally written by Greg Ward <gward@python.net>
// Hardcode the recognized whitespace characters to the US-ASCII
// whitespace characters. The main reason for doing this is that
// some Unicode spaces (like \u00a0) are non-breaking whitespaces.
//
// This less funky little regex just split on recognized spaces. E.g.
// "Hello there -- you goof-ball, use the -b option!"
// splits into
// Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
const wordsep_simple_re = /([\t\n\x0b\x0c\r ]+)/
class TextWrapper {
/*
* Object for wrapping/filling text. The public interface consists of
* the wrap() and fill() methods; the other methods are just there for
* subclasses to override in order to tweak the default behaviour.
* If you want to completely replace the main wrapping algorithm,
* you'll probably have to override _wrap_chunks().
*
* Several instance attributes control various aspects of wrapping:
* width (default: 70)
* the maximum width of wrapped lines (unless break_long_words
* is false)
* initial_indent (default: "")
* string that will be prepended to the first line of wrapped
* output. Counts towards the line's width.
* subsequent_indent (default: "")
* string that will be prepended to all lines save the first
* of wrapped output; also counts towards each line's width.
* expand_tabs (default: true)
* Expand tabs in input text to spaces before further processing.
* Each tab will become 0 .. 'tabsize' spaces, depending on its position
* in its line. If false, each tab is treated as a single character.
* tabsize (default: 8)
* Expand tabs in input text to 0 .. 'tabsize' spaces, unless
* 'expand_tabs' is false.
* replace_whitespace (default: true)
* Replace all whitespace characters in the input text by spaces
* after tab expansion. Note that if expand_tabs is false and
* replace_whitespace is true, every tab will be converted to a
* single space!
* fix_sentence_endings (default: false)
* Ensure that sentence-ending punctuation is always followed
* by two spaces. Off by default because the algorithm is
* (unavoidably) imperfect.
* break_long_words (default: true)
* Break words longer than 'width'. If false, those words will not
* be broken, and some lines might be longer than 'width'.
* break_on_hyphens (default: true)
* Allow breaking hyphenated words. If true, wrapping will occur
* preferably on whitespaces and right after hyphens part of
* compound words.
* drop_whitespace (default: true)
* Drop leading and trailing whitespace from lines.
* max_lines (default: None)
* Truncate wrapped lines.
* placeholder (default: ' [...]')
* Append to the last line of truncated text.
*/
constructor(options = {}) {
let {
width = 70,
initial_indent = '',
subsequent_indent = '',
expand_tabs = true,
replace_whitespace = true,
fix_sentence_endings = false,
break_long_words = true,
drop_whitespace = true,
break_on_hyphens = true,
tabsize = 8,
max_lines = undefined,
placeholder=' [...]'
} = options
this.width = width
this.initial_indent = initial_indent
this.subsequent_indent = subsequent_indent
this.expand_tabs = expand_tabs
this.replace_whitespace = replace_whitespace
this.fix_sentence_endings = fix_sentence_endings
this.break_long_words = break_long_words
this.drop_whitespace = drop_whitespace
this.break_on_hyphens = break_on_hyphens
this.tabsize = tabsize
this.max_lines = max_lines
this.placeholder = placeholder
}
// -- Private methods -----------------------------------------------
// (possibly useful for subclasses to override)
_munge_whitespace(text) {
/*
* _munge_whitespace(text : string) -> string
*
* Munge whitespace in text: expand tabs and convert all other
* whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
* becomes " foo bar baz".
*/
if (this.expand_tabs) {
text = text.replace(/\t/g, ' '.repeat(this.tabsize)) // not strictly correct in js
}
if (this.replace_whitespace) {
text = text.replace(/[\t\n\x0b\x0c\r]/g, ' ')
}
return text
}
_split(text) {
/*
* _split(text : string) -> [string]
*
* Split the text to wrap into indivisible chunks. Chunks are
* not quite the same as words; see _wrap_chunks() for full
* details. As an example, the text
* Look, goof-ball -- use the -b option!
* breaks into the following chunks:
* 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
* 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
* if break_on_hyphens is True, or in:
* 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
* 'use', ' ', 'the', ' ', '-b', ' ', option!'
* otherwise.
*/
let chunks = text.split(wordsep_simple_re)
chunks = chunks.filter(Boolean)
return chunks
}
_handle_long_word(reversed_chunks, cur_line, cur_len, width) {
/*
* _handle_long_word(chunks : [string],
* cur_line : [string],
* cur_len : int, width : int)
*
* Handle a chunk of text (most likely a word, not whitespace) that
* is too long to fit in any line.
*/
// Figure out when indent is larger than the specified width, and make
// sure at least one character is stripped off on every pass
let space_left
if (width < 1) {
space_left = 1
} else {
space_left = width - cur_len
}
// If we're allowed to break long words, then do so: put as much
// of the next chunk onto the current line as will fit.
if (this.break_long_words) {
cur_line.push(reversed_chunks[reversed_chunks.length - 1].slice(0, space_left))
reversed_chunks[reversed_chunks.length - 1] = reversed_chunks[reversed_chunks.length - 1].slice(space_left)
// Otherwise, we have to preserve the long word intact. Only add
// it to the current line if there's nothing already there --
// that minimizes how much we violate the width constraint.
} else if (!cur_line) {
cur_line.push(...reversed_chunks.pop())
}
// If we're not allowed to break long words, and there's already
// text on the current line, do nothing. Next time through the
// main loop of _wrap_chunks(), we'll wind up here again, but
// cur_len will be zero, so the next line will be entirely
// devoted to the long word that we can't handle right now.
}
_wrap_chunks(chunks) {
/*
* _wrap_chunks(chunks : [string]) -> [string]
*
* Wrap a sequence of text chunks and return a list of lines of
* length 'self.width' or less. (If 'break_long_words' is false,
* some lines may be longer than this.) Chunks correspond roughly
* to words and the whitespace between them: each chunk is
* indivisible (modulo 'break_long_words'), but a line break can
* come between any two chunks. Chunks should not have internal
* whitespace; ie. a chunk is either all whitespace or a "word".
* Whitespace chunks will be removed from the beginning and end of
* lines, but apart from that whitespace is preserved.
*/
let lines = []
let indent
if (this.width <= 0) {
throw Error(`invalid width ${this.width} (must be > 0)`)
}
if (this.max_lines !== undefined) {
if (this.max_lines > 1) {
indent = this.subsequent_indent
} else {
indent = this.initial_indent
}
if (indent.length + this.placeholder.trimStart().length > this.width) {
throw Error('placeholder too large for max width')
}
}
// Arrange in reverse order so items can be efficiently popped
// from a stack of chucks.
chunks = chunks.reverse()
while (chunks.length > 0) {
// Start the list of chunks that will make up the current line.
// cur_len is just the length of all the chunks in cur_line.
let cur_line = []
let cur_len = 0
// Figure out which static string will prefix this line.
let indent
if (lines) {
indent = this.subsequent_indent
} else {
indent = this.initial_indent
}
// Maximum width for this line.
let width = this.width - indent.length
// First chunk on line is whitespace -- drop it, unless this
// is the very beginning of the text (ie. no lines started yet).
if (this.drop_whitespace && chunks[chunks.length - 1].trim() === '' && lines.length > 0) {
chunks.pop()
}
while (chunks.length > 0) {
let l = chunks[chunks.length - 1].length
// Can at least squeeze this chunk onto the current line.
if (cur_len + l <= width) {
cur_line.push(chunks.pop())
cur_len += l
// Nope, this line is full.
} else {
break
}
}
// The current line is full, and the next chunk is too big to
// fit on *any* line (not just this one).
if (chunks.length && chunks[chunks.length - 1].length > width) {
this._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = cur_line.map(l => l.length).reduce((a, b) => a + b, 0)
}
// If the last chunk on this line is all whitespace, drop it.
if (this.drop_whitespace && cur_line.length > 0 && cur_line[cur_line.length - 1].trim() === '') {
cur_len -= cur_line[cur_line.length - 1].length
cur_line.pop()
}
if (cur_line) {
if (this.max_lines === undefined ||
lines.length + 1 < this.max_lines ||
(chunks.length === 0 ||
this.drop_whitespace &&
chunks.length === 1 &&
!chunks[0].trim()) && cur_len <= width) {
// Convert current line back to a string and store it in
// list of all lines (return value).
lines.push(indent + cur_line.join(''))
} else {
let had_break = false
while (cur_line) {
if (cur_line[cur_line.length - 1].trim() &&
cur_len + this.placeholder.length <= width) {
cur_line.push(this.placeholder)
lines.push(indent + cur_line.join(''))
had_break = true
break
}
cur_len -= cur_line[-1].length
cur_line.pop()
}
if (!had_break) {
if (lines) {
let prev_line = lines[lines.length - 1].trimEnd()
if (prev_line.length + this.placeholder.length <=
this.width) {
lines[lines.length - 1] = prev_line + this.placeholder
break
}
}
lines.push(indent + this.placeholder.lstrip())
}
break
}
}
}
return lines
}
_split_chunks(text) {
text = this._munge_whitespace(text)
return this._split(text)
}
// -- Public interface ----------------------------------------------
wrap(text) {
/*
* wrap(text : string) -> [string]
*
* Reformat the single paragraph in 'text' so it fits in lines of
* no more than 'self.width' columns, and return a list of wrapped
* lines. Tabs in 'text' are expanded with string.expandtabs(),
* and all other whitespace characters (including newline) are
* converted to space.
*/
let chunks = this._split_chunks(text)
// not implemented in js
//if (this.fix_sentence_endings) {
// this._fix_sentence_endings(chunks)
//}
return this._wrap_chunks(chunks)
}
fill(text) {
/*
* fill(text : string) -> string
*
* Reformat the single paragraph in 'text' to fit in lines of no
* more than 'self.width' columns, and return a new string
* containing the entire wrapped paragraph.
*/
return this.wrap(text).join('\n')
}
}
// -- Convenience interface ---------------------------------------------
function wrap(text, options = {}) {
/*
* Wrap a single paragraph of text, returning a list of wrapped lines.
*
* Reformat the single paragraph in 'text' so it fits in lines of no
* more than 'width' columns, and return a list of wrapped lines. By
* default, tabs in 'text' are expanded with string.expandtabs(), and
* all other whitespace characters (including newline) are converted to
* space. See TextWrapper class for available keyword args to customize
* wrapping behaviour.
*/
let { width = 70, ...kwargs } = options
let w = new TextWrapper(Object.assign({ width }, kwargs))
return w.wrap(text)
}
function fill(text, options = {}) {
/*
* Fill a single paragraph of text, returning a new string.
*
* Reformat the single paragraph in 'text' to fit in lines of no more
* than 'width' columns, and return a new string containing the entire
* wrapped paragraph. As with wrap(), tabs are expanded and other
* whitespace characters converted to space. See TextWrapper class for
* available keyword args to customize wrapping behaviour.
*/
let { width = 70, ...kwargs } = options
let w = new TextWrapper(Object.assign({ width }, kwargs))
return w.fill(text)
}
// -- Loosely related functionality -------------------------------------
let _whitespace_only_re = /^[ \t]+$/mg
let _leading_whitespace_re = /(^[ \t]*)(?:[^ \t\n])/mg
function dedent(text) {
/*
* Remove any common leading whitespace from every line in `text`.
*
* This can be used to make triple-quoted strings line up with the left
* edge of the display, while still presenting them in the source code
* in indented form.
*
* Note that tabs and spaces are both treated as whitespace, but they
* are not equal: the lines " hello" and "\\thello" are
* considered to have no common leading whitespace.
*
* Entirely blank lines are normalized to a newline character.
*/
// Look for the longest leading string of spaces and tabs common to
// all lines.
let margin = undefined
text = text.replace(_whitespace_only_re, '')
let indents = text.match(_leading_whitespace_re) || []
for (let indent of indents) {
indent = indent.slice(0, -1)
if (margin === undefined) {
margin = indent
// Current line more deeply indented than previous winner:
// no change (previous winner is still on top).
} else if (indent.startsWith(margin)) {
// pass
// Current line consistent with and no deeper than previous winner:
// it's the new winner.
} else if (margin.startsWith(indent)) {
margin = indent
// Find the largest common whitespace between current line and previous
// winner.
} else {
for (let i = 0; i < margin.length && i < indent.length; i++) {
if (margin[i] !== indent[i]) {
margin = margin.slice(0, i)
break
}
}
}
}
if (margin) {
text = text.replace(new RegExp('^' + margin, 'mg'), '')
}
return text
}
module.exports = { wrap, fill, dedent }