diff --git a/bin/chat.js b/bin/chat.js index 459d2e0a..cfedd225 100644 --- a/bin/chat.js +++ b/bin/chat.js @@ -1,4 +1,6 @@ import readline from 'readline' +import { parseSql } from 'squirreling' +import { runSqlQuery } from './tools/parquetSql.js' import { tools } from './tools/tools.js' /** @type {'text' | 'tool'} */ @@ -277,11 +279,26 @@ export function chat() { rl.close() process.exit() } else if (input) { + // If the input is valid SQL, run it directly without sending to the model + let isSql = false try { - write(colors.user, 'answer: ', colors.normal) - outputMode = 'text' // switch to text output mode - messages.push([{ role: 'user', content: input }]) - await sendMessages(messages) + parseSql({ query: input }) + isSql = true + } catch { + // not SQL + } + + try { + if (isSql) { + write(colors.user, 'answer: ', colors.normal) + const result = await runSqlQuery(input) + write(result) + } else { + write(colors.user, 'answer: ', colors.normal) + outputMode = 'text' // switch to text output mode + messages.push([{ role: 'user', content: input }]) + await sendMessages(messages) + } } catch (error) { console.error(colors.error, '\n' + error) } finally { diff --git a/bin/cli.js b/bin/cli.js index fbaa2009..d2a29973 100755 --- a/bin/cli.js +++ b/bin/cli.js @@ -8,21 +8,18 @@ import { serve } from './serve.js' const updateCheck = checkForUpdates() const arg = process.argv[2] -if (arg === 'chat') { - await updateCheck // wait for update check to finish before chat - chat() -} else if (arg === '--help' || arg === '-H' || arg === '-h') { +if (arg === '--help' || arg === '-H' || arg === '-h') { console.log('Usage:') + console.log(' hyperparam start chat client') console.log(' hyperparam [path] start hyperparam webapp. "path" is a directory or a URL.') - console.log(' defaults to the current directory.') - console.log(' hyperparam chat start chat client') console.log(' ') console.log(' hyperparam -h, --help, give this help list') console.log(' hyperparam -v, --version print program version') } else if (arg === '--version' || arg === '-V' || arg === '-v') { console.log(packageJson.version) } else if (!arg) { - serve(process.cwd(), undefined) // current directory + await updateCheck + chat() } else if (/^https?:\/\//.exec(arg)) { serve(undefined, arg) // url } else { diff --git a/bin/tools/parquetSql.js b/bin/tools/parquetSql.js index 6db4c8f9..3f1c8244 100644 --- a/bin/tools/parquetSql.js +++ b/bin/tools/parquetSql.js @@ -1,11 +1,86 @@ import { asyncBufferFromFile, asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet' import { compressors } from 'hyparquet-compressors' -import { collect, executeSql } from 'squirreling' +import { collect, executeSql, parseSql, planSql } from 'squirreling' import { parquetDataSource } from 'hyperparam' import { markdownTable } from './markdownTable.js' const maxRows = 100 +/** + * Recursively collect table names from all Scan/Count nodes in a query plan. + * + * @param {import('squirreling').QueryPlan} plan + * @returns {Set} + */ +function scanTables(plan) { + /** @type {Set} */ + const tables = new Set() + /** @param {import('squirreling').QueryPlan} node */ + function walk(node) { + if (!node) return + if (node.type === 'Scan' || node.type === 'Count') { + tables.add(node.table) + } else if ('child' in node) { + walk(node.child) + } + if ('left' in node) walk(node.left) + if ('right' in node) walk(node.right) + } + walk(plan) + return tables +} + +/** + * Build an AsyncDataSource for a file path or URL. + * + * @param {string} file + * @returns {Promise} + */ +async function fileToDataSource(file) { + const asyncBuffer = file.startsWith('http://') || file.startsWith('https://') + ? await asyncBufferFromUrl({ url: file }) + : await asyncBufferFromFile(file) + const metadata = await parquetMetadataAsync(asyncBuffer) + return parquetDataSource(asyncBuffer, metadata, compressors) +} + +/** + * Execute a SQL query by extracting table names from the plan and loading them + * as parquet data sources. Returns a formatted result string. + * + * @param {string} query + * @param {boolean} [truncate] + * @returns {Promise} + */ +export async function runSqlQuery(query, truncate = true) { + const startTime = performance.now() + const ast = parseSql({ query }) + const plan = planSql({ query: ast }) + const tableNames = scanTables(plan) + + /** @type {Record} */ + const tables = {} + await Promise.all([...tableNames].map(async name => { + tables[name] = await fileToDataSource(name) + })) + + const results = await collect(executeSql({ tables, query })) + const queryTime = (performance.now() - startTime) / 1000 + + if (results.length === 0) { + return `Query executed successfully but returned no results in ${queryTime.toFixed(1)} seconds.` + } + + const rowCount = results.length + const maxChars = truncate ? 1000 : 10000 + let content = `Query returned ${rowCount} row${rowCount === 1 ? '' : 's'} in ${queryTime.toFixed(1)} seconds.\n\n` + content += markdownTable(results.slice(0, maxRows), maxChars) + if (rowCount > maxRows) { + content += `\n\n... and ${rowCount - maxRows} more row${rowCount - maxRows === 1 ? '' : 's'} (showing first ${maxRows} rows)` + } + return content +} + /** * @import { ToolHandler } from '../types.d.ts' * @type {ToolHandler} diff --git a/package.json b/package.json index 5c773cc3..ed68abd9 100644 --- a/package.json +++ b/package.json @@ -57,39 +57,39 @@ }, "dependencies": { "hightable": "0.26.4", - "hyparquet": "1.25.1", + "hyparquet": "1.25.3", "hyparquet-compressors": "1.1.1", "icebird": "0.3.1", - "squirreling": "0.10.3" + "squirreling": "0.11.2" }, "devDependencies": { - "@storybook/react-vite": "10.2.19", + "@storybook/react-vite": "10.3.3", "@testing-library/react": "16.3.2", "@types/node": "25.5.0", "@types/react": "19.2.14", "@types/react-dom": "19.2.3", - "@vitejs/plugin-react": "5.1.4", - "@vitest/coverage-v8": "4.1.0", + "@vitejs/plugin-react": "6.0.1", + "@vitest/coverage-v8": "4.1.2", "eslint": "9.39.2", "eslint-plugin-react": "7.37.5", "eslint-plugin-react-hooks": "7.0.1", "eslint-plugin-react-refresh": "0.5.2", - "eslint-plugin-storybook": "10.2.19", + "eslint-plugin-storybook": "10.3.3", "globals": "17.4.0", - "jsdom": "29.0.0", + "jsdom": "29.0.1", "nodemon": "3.1.14", "npm-run-all": "4.1.5", "react": "19.2.4", "react-dom": "19.2.4", - "storybook": "10.2.19", - "typescript": "5.9.3", - "typescript-eslint": "8.57.0", - "vite": "7.3.1", - "vitest": "4.1.0" + "storybook": "10.3.3", + "typescript": "6.0.2", + "typescript-eslint": "8.58.0", + "vite": "8.0.3", + "vitest": "4.1.2" }, "peerDependencies": { - "react": "^18.3.1 || ^19", - "react-dom": "^18.3.1 || ^19" + "react": "18.3.1 || ^19", + "react-dom": "18.3.1 || ^19" }, "eslintConfig": { "extends": [ diff --git a/src/components/Json/Json.tsx b/src/components/Json/Json.tsx index 933be489..4bff4a56 100644 --- a/src/components/Json/Json.tsx +++ b/src/components/Json/Json.tsx @@ -96,7 +96,7 @@ function ByteArray({ bytes, label, expandRoot }: { bytes: Uint8Array, label?: st } function CollapsedArray({ array }: {array: unknown[]}): ReactNode { - const { elementRef, width } = useWidth() + const { elementRef, width } = useWidth() const maxCharacterCount = Math.max(20, Math.floor(width / 8)) const separator = ', ' @@ -159,7 +159,7 @@ function JsonArray({ array, label, expandRoot, pageLimit = defaultPageLimit }: { } function CollapsedObject({ obj }: { obj: object }): ReactNode { - const { elementRef, width } = useWidth() + const { elementRef, width } = useWidth() const maxCharacterCount = Math.max(20, Math.floor(width / 8)) const separator = ', ' const kvSeparator = ': ' diff --git a/src/components/ProgressBar/ProgressBar.module.css b/src/components/ProgressBar/ProgressBar.module.css index 27bfb98f..ece59a01 100644 --- a/src/components/ProgressBar/ProgressBar.module.css +++ b/src/components/ProgressBar/ProgressBar.module.css @@ -1,3 +1,12 @@ +@keyframes shimmer { + 0% { + background-position: -1000px; + } + 100% { + background-position: 1000px; + } +} + /* progress bar */ .progressBar { position: fixed; @@ -13,15 +22,6 @@ background-size: 1000px; animation: shimmer 4s infinite linear; - @keyframes shimmer { - 0% { - background-position: -1000px; - } - 100% { - background-position: 1000px; - } - } - & > [role="presentation"] { height: 100%; background-color: #3a4; diff --git a/src/components/TableView/TableView.tsx b/src/components/TableView/TableView.tsx index 86b94e50..11c26b17 100644 --- a/src/components/TableView/TableView.tsx +++ b/src/components/TableView/TableView.tsx @@ -25,7 +25,7 @@ interface Content extends ContentSize { * Table file viewer for parquet, CSV, and JSONL files */ export default function TableView({ source, setProgress, setError }: ViewerProps) { - const [isLoading, setIsLoading] = useState(true) + const [isLoading, setIsLoading] = useState(true) const [content, setContent] = useState() const [cell, setCell] = useState<{ row: number, col: number } | undefined>() const { customClass, routes } = useConfig() diff --git a/src/lib/parquet/parquetDataSource.ts b/src/lib/parquet/parquetDataSource.ts index f34e5d43..c0ca7980 100644 --- a/src/lib/parquet/parquetDataSource.ts +++ b/src/lib/parquet/parquetDataSource.ts @@ -1,5 +1,7 @@ import { parquetReadObjects, parquetSchema } from 'hyparquet' -import type { AsyncBuffer, Compressors, FileMetaData } from 'hyparquet' +import { parquetReadAsync } from 'hyparquet/src/read.js' +import { assembleAsync } from 'hyparquet/src/rowgroup.js' +import type { AsyncBuffer, AsyncRowGroup, Compressors, FileMetaData } from 'hyparquet' import { AsyncDataSource, ScanOptions, asyncRow } from 'squirreling' import { whereToParquetFilter } from './parquetFilter.js' import { extractSpatialFilter, rowGroupOverlaps } from './parquetSpatial.js' @@ -87,5 +89,40 @@ export function parquetDataSource(file: AsyncBuffer, metadata: FileMetaData, com appliedLimitOffset, } }, + + async *scanColumn({ column, limit, offset, signal }) { + const rowStart = offset ?? 0 + const rowEnd = limit !== undefined ? rowStart + limit : undefined + const asyncGroups = parquetReadAsync({ + file, + metadata, + rowStart, + rowEnd, + columns: [column], + compressors, + }) + const schemaTree = parquetSchema(metadata) + const assembled = asyncGroups.map((arg: AsyncRowGroup) => assembleAsync(arg, schemaTree)) + + for (const rg of assembled) { + if (signal?.aborted) throw new DOMException('Aborted', 'AbortError') + const [firstCol] = rg.asyncColumns + if (!firstCol) continue + const { skipped, data } = await firstCol.data + if (signal?.aborted) throw new DOMException('Aborted', 'AbortError') + let dataStart = rg.groupStart + skipped + for (const page of data) { + const pageRows = page.length + const selectStart = Math.max(rowStart - dataStart, 0) + const selectEnd = Math.min((rowEnd ?? Infinity) - dataStart, pageRows) + if (selectEnd > selectStart) { + yield selectStart > 0 || selectEnd < pageRows + ? page.slice(selectStart, selectEnd) + : page + } + dataStart += pageRows + } + } + }, } }