-
Notifications
You must be signed in to change notification settings - Fork 867
/
Copy pathutils.ts
185 lines (154 loc) · 5.92 KB
/
utils.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
import * as d3 from 'd3';
import Column from './column';
import * as XLSX from 'xlsx';
import { DictTable } from '../components/ComponentType';
import { CoerceType, TestType, Type } from './types';
import { ColumnTable } from './table';
export const loadTextDataWrapper = (title: string, text: string, fileType: string): DictTable | undefined => {
let tableName = title;
//let tableName = title.replace(/\.[^/.]+$/ , "");
let table = undefined;
if (fileType == "text/csv" || fileType == "text/tab-separated-values") {
table = createTableFromText(tableName, text);
} else if (fileType == "application/json") {
table = createTableFromFromObjectArray(tableName, JSON.parse(text), true);
}
return table;
};
export const createTableFromText = (title: string, text: string): DictTable | undefined => {
// Check for empty strings, bad data, anything else?
if (!text || text.trim() === '') {
console.log('Invalid text provided for data. Could not load.');
return undefined;
}
// Determine if the input text is tab or comma separated values
// Compute the number of tabs and lines
let tabNum = 0,
lineNum = 0;
for (let i = 0; i < text.length; i++) {
if (text.charAt(i) === '\t') tabNum++;
if (text.charAt(i) === '\n') lineNum++;
}
// If one or more tab per line, then it is tab separated values
// Should check the data file as well for the ending
const isTabSeparated = tabNum / lineNum >= 1;
// Use d3.dsvFormat to create a custom parser that properly handles quoted fields
// This ensures commas inside quoted fields won't be treated as delimiters
const values = isTabSeparated
? d3.tsvParse(text)
: d3.dsvFormat(',').parse(text, row => {
// Process each row to ensure proper type handling
return row;
});
return createTableFromFromObjectArray(title, values, true);
};
export const createTableFromFromObjectArray = (title: string, values: any[], anchored: boolean, derive?: any): DictTable => {
const len = values.length;
let names: string[] = [];
let cleanNames: string[] = [];
const columns = new Map<string, Column>();
if (len) {
names = Object.keys(values[0]);
cleanNames = names.map((name, i) => {
if (name == "") {
let newName = `c${i}`;
let k = 0;
while(names.includes(newName)) {
newName = `c${i}_${k}`
k = k + 1;
}
return newName;
}
// clean up messy column names
if (name && name.includes(".")) {
return name.replace(".", "_");
}
return name;
})
for (let i = 0; i < names.length; i++) {
let col = [];
for (let r = 0; r < len; r++) {
col.push(values[r][names[i]]);
}
const type = inferTypeFromValueArray(col);
col = coerceValueArrayFromTypes(col, type);
columns.set(cleanNames[i], new Column(col, type));
}
}
let columnTable = new ColumnTable(columns, cleanNames);
return {
id: title,
displayId: `${title}`,
names: columnTable.names(),
types: columnTable.names().map(name => (columnTable.column(name) as Column).type),
rows: columnTable.objects(),
derive: derive,
anchored: anchored
}
};
export const inferTypeFromValueArray = (values: any[]): Type => {
let types: Type[] = [Type.Boolean, Type.Integer, Type.Date, Type.Number, Type.String];
for (let i = 0; i < values.length; i++) {
const v = values[i];
for (let t = 0; t < types.length; t++) {
if (v != null && !TestType[types[t]](v)) {
types.splice(t, 1);
t -= 1;
}
}
}
return types[0];
};
export const convertTypeToDtype = (type: Type | undefined): string => {
return type === Type.Integer || type === Type.Number
? 'quantitative'
: type === Type.Boolean
? 'boolean'
: type === Type.Date
? 'date'
: 'nominal';
};
export const coerceValueArrayFromTypes = (values: any[], type: Type): any[] => {
return values.map((v) => CoerceType[type](v));
};
export const coerceValueFromTypes = (value: any, type: Type): any => {
return CoerceType[type](value);
};
export const computeUniqueValues = (values: any[]): any[] => {
return Array.from(new Set(values));
};
export function tupleEqual(a: any[], b: any[]) {
// check if two tuples are equal
if (a === b) return true;
if (a == null || b == null) return false;
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; ++i) {
if (a[i] !== b[i]) return false;
}
return true;
}
export const loadBinaryDataWrapper = (title: string, arrayBuffer: ArrayBuffer): DictTable[] => {
try {
// Read the Excel file
const workbook = XLSX.read(arrayBuffer, { type: 'array' });
// Get all sheet names
const sheetNames = workbook.SheetNames;
// Create tables for each sheet
const tables: DictTable[] = [];
for (const sheetName of sheetNames) {
// Get the worksheet
const worksheet = workbook.Sheets[sheetName];
// Convert the worksheet to JSON
const jsonData = XLSX.utils.sheet_to_json(worksheet);
// Create a table from the JSON data with sheet name included in the title
const sheetTable = createTableFromFromObjectArray(`${title}-${sheetName}`, jsonData, true);
tables.push(sheetTable);
}
return tables;
} catch (error) {
console.error('Error processing Excel file:', error);
return [];
}
};