| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 | 
							- "use strict";
 
- // Simulations show these probabilities for a single change
 
- // 93.1% that one group is invalidated
 
- // 4.8% that two groups are invalidated
 
- // 1.1% that 3 groups are invalidated
 
- // 0.1% that 4 or more groups are invalidated
 
- //
 
- // And these for removing/adding 10 lexically adjacent files
 
- // 64.5% that one group is invalidated
 
- // 24.8% that two groups are invalidated
 
- // 7.8% that 3 groups are invalidated
 
- // 2.7% that 4 or more groups are invalidated
 
- //
 
- // And these for removing/adding 3 random files
 
- // 0% that one group is invalidated
 
- // 3.7% that two groups are invalidated
 
- // 80.8% that 3 groups are invalidated
 
- // 12.3% that 4 groups are invalidated
 
- // 3.2% that 5 or more groups are invalidated
 
- /**
 
-  *
 
-  * @param {string} a key
 
-  * @param {string} b key
 
-  * @returns {number} the similarity as number
 
-  */
 
- const similarity = (a, b) => {
 
- 	const l = Math.min(a.length, b.length);
 
- 	let dist = 0;
 
- 	for (let i = 0; i < l; i++) {
 
- 		const ca = a.charCodeAt(i);
 
- 		const cb = b.charCodeAt(i);
 
- 		dist += Math.max(0, 10 - Math.abs(ca - cb));
 
- 	}
 
- 	return dist;
 
- };
 
- /**
 
-  * @param {string} a key
 
-  * @param {string} b key
 
-  * @returns {string} the common part and a single char for the difference
 
-  */
 
- const getName = (a, b) => {
 
- 	const l = Math.min(a.length, b.length);
 
- 	let r = "";
 
- 	for (let i = 0; i < l; i++) {
 
- 		const ca = a.charAt(i);
 
- 		const cb = b.charAt(i);
 
- 		r += ca;
 
- 		if (ca === cb) {
 
- 			continue;
 
- 		}
 
- 		return r;
 
- 	}
 
- 	return a;
 
- };
 
- /**
 
-  * @template T
 
-  */
 
- class Node {
 
- 	/**
 
- 	 * @param {T} item item
 
- 	 * @param {string} key key
 
- 	 * @param {number} size size
 
- 	 */
 
- 	constructor(item, key, size) {
 
- 		this.item = item;
 
- 		this.key = key;
 
- 		this.size = size;
 
- 	}
 
- }
 
- /**
 
-  * @template T
 
-  */
 
- class Group {
 
- 	/**
 
- 	 * @param {Node<T>[]} nodes nodes
 
- 	 * @param {number[]} similarities similarities between the nodes (length = nodes.length - 1)
 
- 	 */
 
- 	constructor(nodes, similarities) {
 
- 		this.nodes = nodes;
 
- 		this.similarities = similarities;
 
- 		this.size = nodes.reduce((size, node) => size + node.size, 0);
 
- 		/** @type {string} */
 
- 		this.key = undefined;
 
- 	}
 
- }
 
- /**
 
-  * @template T
 
-  * @typedef {Object} GroupedItems<T>
 
-  * @property {string} key
 
-  * @property {T[]} items
 
-  * @property {number} size
 
-  */
 
- /**
 
-  * @template T
 
-  * @typedef {Object} Options
 
-  * @property {number} maxSize maximum size of a group
 
-  * @property {number} minSize minimum size of a group (preferred over maximum size)
 
-  * @property {Iterable<T>} items a list of items
 
-  * @property {function(T): number} getSize function to get size of an item
 
-  * @property {function(T): string} getKey function to get the key of an item
 
-  */
 
- /**
 
-  * @template T
 
-  * @param {Options<T>} options options object
 
-  * @returns {GroupedItems<T>[]} grouped items
 
-  */
 
- module.exports = ({ maxSize, minSize, items, getSize, getKey }) => {
 
- 	/** @type {Group<T>[]} */
 
- 	const result = [];
 
- 	const nodes = Array.from(
 
- 		items,
 
- 		item => new Node(item, getKey(item), getSize(item))
 
- 	);
 
- 	/** @type {Node<T>[]} */
 
- 	const initialNodes = [];
 
- 	// lexically ordering of keys
 
- 	nodes.sort((a, b) => {
 
- 		if (a.key < b.key) return -1;
 
- 		if (a.key > b.key) return 1;
 
- 		return 0;
 
- 	});
 
- 	// return nodes bigger than maxSize directly as group
 
- 	for (const node of nodes) {
 
- 		if (node.size >= maxSize) {
 
- 			result.push(new Group([node], []));
 
- 		} else {
 
- 			initialNodes.push(node);
 
- 		}
 
- 	}
 
- 	if (initialNodes.length > 0) {
 
- 		// calculate similarities between lexically adjacent nodes
 
- 		/** @type {number[]} */
 
- 		const similarities = [];
 
- 		for (let i = 1; i < initialNodes.length; i++) {
 
- 			const a = initialNodes[i - 1];
 
- 			const b = initialNodes[i];
 
- 			similarities.push(similarity(a.key, b.key));
 
- 		}
 
- 		const initialGroup = new Group(initialNodes, similarities);
 
- 		if (initialGroup.size < minSize) {
 
- 			// We hit an edgecase where the working set is already smaller than minSize
 
- 			// We merge it with the smallest result node to keep minSize intact
 
- 			if (result.length > 0) {
 
- 				const smallestGroup = result.reduce((min, group) =>
 
- 					min.size > group.size ? group : min
 
- 				);
 
- 				for (const node of initialGroup.nodes) smallestGroup.nodes.push(node);
 
- 				smallestGroup.nodes.sort((a, b) => {
 
- 					if (a.key < b.key) return -1;
 
- 					if (a.key > b.key) return 1;
 
- 					return 0;
 
- 				});
 
- 			} else {
 
- 				// There are no other nodes
 
- 				// We use all nodes and have to accept that it's smaller than minSize
 
- 				result.push(initialGroup);
 
- 			}
 
- 		} else {
 
- 			const queue = [initialGroup];
 
- 			while (queue.length) {
 
- 				const group = queue.pop();
 
- 				// only groups bigger than maxSize need to be splitted
 
- 				if (group.size < maxSize) {
 
- 					result.push(group);
 
- 					continue;
 
- 				}
 
- 				// find unsplittable area from left and right
 
- 				// going minSize from left and right
 
- 				// at least one node need to be included otherwise we get stuck
 
- 				let left = 0;
 
- 				let leftSize = 0;
 
- 				while (leftSize <= minSize) {
 
- 					leftSize += group.nodes[left].size;
 
- 					left++;
 
- 				}
 
- 				let right = group.nodes.length - 1;
 
- 				let rightSize = 0;
 
- 				while (rightSize <= minSize) {
 
- 					rightSize += group.nodes[right].size;
 
- 					right--;
 
- 				}
 
- 				if (left - 1 > right) {
 
- 					// can't split group while holding minSize
 
- 					// because minSize is preferred of maxSize we return
 
- 					// the group here even while it's too big
 
- 					// To avoid this make sure maxSize > minSize * 3
 
- 					result.push(group);
 
- 					continue;
 
- 				}
 
- 				if (left <= right) {
 
- 					// when there is a area between left and right
 
- 					// we look for best split point
 
- 					// we split at the minimum similarity
 
- 					// here key space is separated the most
 
- 					let best = left - 1;
 
- 					let bestSimilarity = group.similarities[best];
 
- 					for (let i = left; i <= right; i++) {
 
- 						const similarity = group.similarities[i];
 
- 						if (similarity < bestSimilarity) {
 
- 							best = i;
 
- 							bestSimilarity = similarity;
 
- 						}
 
- 					}
 
- 					left = best + 1;
 
- 					right = best;
 
- 				}
 
- 				// create two new groups for left and right area
 
- 				// and queue them up
 
- 				const rightNodes = [group.nodes[right + 1]];
 
- 				/** @type {number[]} */
 
- 				const rightSimilaries = [];
 
- 				for (let i = right + 2; i < group.nodes.length; i++) {
 
- 					rightSimilaries.push(group.similarities[i - 1]);
 
- 					rightNodes.push(group.nodes[i]);
 
- 				}
 
- 				queue.push(new Group(rightNodes, rightSimilaries));
 
- 				const leftNodes = [group.nodes[0]];
 
- 				/** @type {number[]} */
 
- 				const leftSimilaries = [];
 
- 				for (let i = 1; i < left; i++) {
 
- 					leftSimilaries.push(group.similarities[i - 1]);
 
- 					leftNodes.push(group.nodes[i]);
 
- 				}
 
- 				queue.push(new Group(leftNodes, leftSimilaries));
 
- 			}
 
- 		}
 
- 	}
 
- 	// lexically ordering
 
- 	result.sort((a, b) => {
 
- 		if (a.nodes[0].key < b.nodes[0].key) return -1;
 
- 		if (a.nodes[0].key > b.nodes[0].key) return 1;
 
- 		return 0;
 
- 	});
 
- 	// give every group a name
 
- 	for (let i = 0; i < result.length; i++) {
 
- 		const group = result[i];
 
- 		const first = group.nodes[0];
 
- 		const last = group.nodes[group.nodes.length - 1];
 
- 		let name = getName(first.key, last.key);
 
- 		group.key = name;
 
- 	}
 
- 	// return the results
 
- 	return result.map(group => {
 
- 		/** @type {GroupedItems} */
 
- 		return {
 
- 			key: group.key,
 
- 			items: group.nodes.map(node => node.item),
 
- 			size: group.size
 
- 		};
 
- 	});
 
- };
 
 
  |