Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions bin/chat.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import readline from 'readline'
import { parseSql } from 'squirreling'
import { runSqlQuery } from './tools/parquetSql.js'
import { tools } from './tools/tools.js'

/** @type {'text' | 'tool'} */
Expand Down Expand Up @@ -277,11 +279,26 @@ export function chat() {
rl.close()
process.exit()
} else if (input) {
// If the input is valid SQL, run it directly without sending to the model
let isSql = false
try {
write(colors.user, 'answer: ', colors.normal)
outputMode = 'text' // switch to text output mode
messages.push([{ role: 'user', content: input }])
await sendMessages(messages)
parseSql({ query: input })
isSql = true
} catch {
// not SQL
}

try {
if (isSql) {
write(colors.user, 'answer: ', colors.normal)
const result = await runSqlQuery(input)
write(result)
} else {
write(colors.user, 'answer: ', colors.normal)
outputMode = 'text' // switch to text output mode
messages.push([{ role: 'user', content: input }])
await sendMessages(messages)
}
} catch (error) {
console.error(colors.error, '\n' + error)
} finally {
Expand Down
11 changes: 4 additions & 7 deletions bin/cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,18 @@ import { serve } from './serve.js'
const updateCheck = checkForUpdates()

const arg = process.argv[2]
if (arg === 'chat') {
await updateCheck // wait for update check to finish before chat
chat()
} else if (arg === '--help' || arg === '-H' || arg === '-h') {
if (arg === '--help' || arg === '-H' || arg === '-h') {
console.log('Usage:')
console.log(' hyperparam start chat client')
console.log(' hyperparam [path] start hyperparam webapp. "path" is a directory or a URL.')
console.log(' defaults to the current directory.')
console.log(' hyperparam chat start chat client')
console.log(' ')
console.log(' hyperparam -h, --help, give this help list')
console.log(' hyperparam -v, --version print program version')
} else if (arg === '--version' || arg === '-V' || arg === '-v') {
console.log(packageJson.version)
} else if (!arg) {
serve(process.cwd(), undefined) // current directory
await updateCheck
chat()
} else if (/^https?:\/\//.exec(arg)) {
serve(undefined, arg) // url
} else {
Expand Down
77 changes: 76 additions & 1 deletion bin/tools/parquetSql.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,86 @@
import { asyncBufferFromFile, asyncBufferFromUrl, parquetMetadataAsync } from 'hyparquet'
import { compressors } from 'hyparquet-compressors'
import { collect, executeSql } from 'squirreling'
import { collect, executeSql, parseSql, planSql } from 'squirreling'
import { parquetDataSource } from 'hyperparam'
import { markdownTable } from './markdownTable.js'

const maxRows = 100

/**
* Recursively collect table names from all Scan/Count nodes in a query plan.
*
* @param {import('squirreling').QueryPlan} plan
* @returns {Set<string>}
*/
function scanTables(plan) {
/** @type {Set<string>} */
const tables = new Set()
/** @param {import('squirreling').QueryPlan} node */
function walk(node) {
if (!node) return
if (node.type === 'Scan' || node.type === 'Count') {
tables.add(node.table)
} else if ('child' in node) {
walk(node.child)
}
if ('left' in node) walk(node.left)
if ('right' in node) walk(node.right)
}
walk(plan)
return tables
}

/**
* Build an AsyncDataSource for a file path or URL.
*
* @param {string} file
* @returns {Promise<import('squirreling').AsyncDataSource>}
*/
async function fileToDataSource(file) {
const asyncBuffer = file.startsWith('http://') || file.startsWith('https://')
? await asyncBufferFromUrl({ url: file })
: await asyncBufferFromFile(file)
const metadata = await parquetMetadataAsync(asyncBuffer)
return parquetDataSource(asyncBuffer, metadata, compressors)
}

/**
* Execute a SQL query by extracting table names from the plan and loading them
* as parquet data sources. Returns a formatted result string.
*
* @param {string} query
* @param {boolean} [truncate]
* @returns {Promise<string>}
*/
export async function runSqlQuery(query, truncate = true) {
const startTime = performance.now()
const ast = parseSql({ query })
const plan = planSql({ query: ast })
const tableNames = scanTables(plan)

/** @type {Record<string, import('squirreling').AsyncDataSource>} */
const tables = {}
await Promise.all([...tableNames].map(async name => {
tables[name] = await fileToDataSource(name)
}))

const results = await collect(executeSql({ tables, query }))
const queryTime = (performance.now() - startTime) / 1000

if (results.length === 0) {
return `Query executed successfully but returned no results in ${queryTime.toFixed(1)} seconds.`
}

const rowCount = results.length
const maxChars = truncate ? 1000 : 10000
let content = `Query returned ${rowCount} row${rowCount === 1 ? '' : 's'} in ${queryTime.toFixed(1)} seconds.\n\n`
content += markdownTable(results.slice(0, maxRows), maxChars)
if (rowCount > maxRows) {
content += `\n\n... and ${rowCount - maxRows} more row${rowCount - maxRows === 1 ? '' : 's'} (showing first ${maxRows} rows)`
}
return content
}

/**
* @import { ToolHandler } from '../types.d.ts'
* @type {ToolHandler}
Expand Down
28 changes: 14 additions & 14 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,39 +57,39 @@
},
"dependencies": {
"hightable": "0.26.4",
"hyparquet": "1.25.1",
"hyparquet": "1.25.3",
"hyparquet-compressors": "1.1.1",
"icebird": "0.3.1",
"squirreling": "0.10.3"
"squirreling": "0.11.2"
},
"devDependencies": {
"@storybook/react-vite": "10.2.19",
"@storybook/react-vite": "10.3.3",
"@testing-library/react": "16.3.2",
"@types/node": "25.5.0",
"@types/react": "19.2.14",
"@types/react-dom": "19.2.3",
"@vitejs/plugin-react": "5.1.4",
"@vitest/coverage-v8": "4.1.0",
"@vitejs/plugin-react": "6.0.1",
"@vitest/coverage-v8": "4.1.2",
"eslint": "9.39.2",
"eslint-plugin-react": "7.37.5",
"eslint-plugin-react-hooks": "7.0.1",
"eslint-plugin-react-refresh": "0.5.2",
"eslint-plugin-storybook": "10.2.19",
"eslint-plugin-storybook": "10.3.3",
"globals": "17.4.0",
"jsdom": "29.0.0",
"jsdom": "29.0.1",
"nodemon": "3.1.14",
"npm-run-all": "4.1.5",
"react": "19.2.4",
"react-dom": "19.2.4",
"storybook": "10.2.19",
"typescript": "5.9.3",
"typescript-eslint": "8.57.0",
"vite": "7.3.1",
"vitest": "4.1.0"
"storybook": "10.3.3",
"typescript": "6.0.2",
"typescript-eslint": "8.58.0",
"vite": "8.0.3",
"vitest": "4.1.2"
},
"peerDependencies": {
"react": "^18.3.1 || ^19",
"react-dom": "^18.3.1 || ^19"
"react": "18.3.1 || ^19",
"react-dom": "18.3.1 || ^19"
},
"eslintConfig": {
"extends": [
Expand Down
4 changes: 2 additions & 2 deletions src/components/Json/Json.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ function ByteArray({ bytes, label, expandRoot }: { bytes: Uint8Array, label?: st
}

function CollapsedArray({ array }: {array: unknown[]}): ReactNode {
const { elementRef, width } = useWidth<HTMLSpanElement>()
const { elementRef, width } = useWidth()
const maxCharacterCount = Math.max(20, Math.floor(width / 8))
const separator = ', '

Expand Down Expand Up @@ -159,7 +159,7 @@ function JsonArray({ array, label, expandRoot, pageLimit = defaultPageLimit }: {
}

function CollapsedObject({ obj }: { obj: object }): ReactNode {
const { elementRef, width } = useWidth<HTMLSpanElement>()
const { elementRef, width } = useWidth()
const maxCharacterCount = Math.max(20, Math.floor(width / 8))
const separator = ', '
const kvSeparator = ': '
Expand Down
18 changes: 9 additions & 9 deletions src/components/ProgressBar/ProgressBar.module.css
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
@keyframes shimmer {
0% {
background-position: -1000px;
}
100% {
background-position: 1000px;
}
}

/* progress bar */
.progressBar {
position: fixed;
Expand All @@ -13,15 +22,6 @@
background-size: 1000px;
animation: shimmer 4s infinite linear;

@keyframes shimmer {
0% {
background-position: -1000px;
}
100% {
background-position: 1000px;
}
}

& > [role="presentation"] {
height: 100%;
background-color: #3a4;
Expand Down
2 changes: 1 addition & 1 deletion src/components/TableView/TableView.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ interface Content extends ContentSize {
* Table file viewer for parquet, CSV, and JSONL files
*/
export default function TableView({ source, setProgress, setError }: ViewerProps) {
const [isLoading, setIsLoading] = useState<boolean>(true)
const [isLoading, setIsLoading] = useState(true)
const [content, setContent] = useState<Content>()
const [cell, setCell] = useState<{ row: number, col: number } | undefined>()
const { customClass, routes } = useConfig()
Expand Down
39 changes: 38 additions & 1 deletion src/lib/parquet/parquetDataSource.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { parquetReadObjects, parquetSchema } from 'hyparquet'
import type { AsyncBuffer, Compressors, FileMetaData } from 'hyparquet'
import { parquetReadAsync } from 'hyparquet/src/read.js'
import { assembleAsync } from 'hyparquet/src/rowgroup.js'
import type { AsyncBuffer, AsyncRowGroup, Compressors, FileMetaData } from 'hyparquet'
import { AsyncDataSource, ScanOptions, asyncRow } from 'squirreling'
import { whereToParquetFilter } from './parquetFilter.js'
import { extractSpatialFilter, rowGroupOverlaps } from './parquetSpatial.js'
Expand Down Expand Up @@ -87,5 +89,40 @@ export function parquetDataSource(file: AsyncBuffer, metadata: FileMetaData, com
appliedLimitOffset,
}
},

async *scanColumn({ column, limit, offset, signal }) {
const rowStart = offset ?? 0
const rowEnd = limit !== undefined ? rowStart + limit : undefined
const asyncGroups = parquetReadAsync({
file,
metadata,
rowStart,
rowEnd,
columns: [column],
compressors,
})
const schemaTree = parquetSchema(metadata)
const assembled = asyncGroups.map((arg: AsyncRowGroup) => assembleAsync(arg, schemaTree))

for (const rg of assembled) {
if (signal?.aborted) throw new DOMException('Aborted', 'AbortError')
const [firstCol] = rg.asyncColumns
if (!firstCol) continue
const { skipped, data } = await firstCol.data
if (signal?.aborted) throw new DOMException('Aborted', 'AbortError')
let dataStart = rg.groupStart + skipped
for (const page of data) {
const pageRows = page.length
const selectStart = Math.max(rowStart - dataStart, 0)
const selectEnd = Math.min((rowEnd ?? Infinity) - dataStart, pageRows)
if (selectEnd > selectStart) {
yield selectStart > 0 || selectEnd < pageRows
? page.slice(selectStart, selectEnd)
: page
}
dataStart += pageRows
}
}
},
}
}