Initial commit of Websearch

This commit is contained in:
2024-11-10 16:23:51 -07:00
parent 8275fca4a7
commit 60c7822b04
5 changed files with 258 additions and 1 deletions

View File

@@ -1,3 +1,55 @@
# websearch
CLI websearch with an LLM summary using SearXNG to get search results.
CLI websearch with an LLM summary using SearXNG to get search results.
Websearch is a Deno project that will grow to be provide more and more useful
summaries of information on the web for a given topic. It could easily be built
with another framework, but this is also an education project to learn the new
framework.
This is an example of using an LLM with knowledge outside of the Model.
## Depenecies
Websearch is dependent on Ollama running localy with a model intended to perform
the analysis/summary of the search information. That websearch is right now
using SearXNG, but could be easily ported to other providers.
## Usage
If you want to use this in GNU/Linux on an x68 platform, a pre-built binary is
in the repository. Copy it to a location in your `PATH` like `~/.local/bin/ws`.
On the first run, you'll need to tell Websearch what Ollama Model to user and
where the SearXNG endpoint is with the `--model MODEL` and
`--search_url SEARCH_URL` comand line flags, as well as provide a search query.
```
$ ws --model=Ollama3.1 --search_url=http://localhost:8000 \
Movies releasing in theaters in 2025
```
### Building for other platforms
See the [Deno Compile Options](https://docs.deno.com/runtime/reference/cli/compiler/#compile-options)
for supported platform targets.
Example for GNU/Linux_x68:
```
$ deno compile \
--target x86_64-unknown-linux-gnu \
--allow-env=HOME,NODE_V8_COVERAGE \
--allow-net \
--allow-read \
--allow-write \
main.ts
```
## Configuration
Once Websearch runs successfuly, the model and search_url are saved to the
configuration file at `~/.config/websearch/config.yml`. This is used so
subsequent calls can omit the flags.
If you provide a flag with an updated option, that will be updated in the
config.

12
deno.json Normal file
View File

@@ -0,0 +1,12 @@
{
"tasks": {
"dev": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts",
"prod": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts"
},
"imports": {
"@std/assert": "jsr:@std/assert@1",
"@std/cli": "jsr:@std/cli@^1.0.6",
"@std/fs": "jsr:@std/fs@^1.0.5",
"@std/yaml": "jsr:@std/yaml@^1.0.5"
}
}

111
main.ts Normal file
View File

@@ -0,0 +1,111 @@
import { parseArgs } from "@std/cli/parse-args";
import type { Args } from "@std/cli/parse-args";
import { ensureDir } from "@std/fs";
import { parse, stringify } from "@std/yaml";
import { answerQuery, cleanTextFromHtml, getNewsUrls } from "./websearch.ts";
export interface Config {
model?: string;
search_url?: string;
}
const parsed: Args = parseArgs(Deno.args, {
boolean: ["help"],
string: ["model", "search_url"],
});
if (parsed.help) {
console.log(`
Usage:
websearch [--model MODEL] [--search_url SEARCH_URL] QUERY
Options:
--model Ollama model to use
--search_url URL for SearXNG endpoint
Arguments:
QUERY The topic to search for news to summarize
Configuration:
The websearch configuration file is stored in the XDG Config directory
/home/$USER/.config/websearch/config.yml. Both the model and search_url
can be customized as an alternative to providing the options for each.
If both the Ollama model and SearXNG endpoint are successful, the
configuration is automatically saved/updated.
`);
Deno.exit();
}
const configDir = `${Deno.env.get("HOME")}/.config/websearch`;
const configPath = `${Deno.env.get("HOME")}/.config/websearch/config.yml`;
async function loadConfig(): Promise<Config> {
try {
const yamlString = await Deno.readTextFile(configPath);
return parse(yamlString) as Config;
} catch {
return {};
}
}
async function saveConfig(config: Config) {
await ensureDir(configDir);
await Deno.writeTextFile(
configPath,
stringify(config),
);
}
async function main(args: Args) {
const query = args._.join(" ");
if (!query) {
throw new Error("Please provide a search query");
}
console.log(`Query: ${query}`);
try {
const config = await loadConfig();
if (!config.model) {
if (args.model) {
config.model = args.model;
} else {
throw new Error("Provide --model or add Ollama model to configuration");
}
} else if (args.model && args.model !== config.model) {
config.model = args.model;
}
if (!config.search_url) {
if (args.search_url) {
config.search_url = args.search_url;
} else {
throw new Error(
"Provide --search_url or add search_url to configuration",
);
}
} else if (args.search_url && args.search_url !== config.search_url) {
config.search_url = args.search_url;
}
const urls = await getNewsUrls(config, query);
if (!urls || urls.length === 0) {
console.log("No results");
Deno.exit(1);
}
const cleanedTexts = await Promise.all(
urls.map((url) => cleanTextFromHtml(url)),
);
await answerQuery(config, query, cleanedTexts.join("\n\n"));
await saveConfig(config);
} catch (error: any) {
console.error(`Error processing query "${query}":`, error.message);
Deno.exit(1);
}
}
if (import.meta.main) {
main(parsed).catch((error) => {
console.error("Unhandled exception:", error);
Deno.exit(1);
});
}

82
websearch.ts Normal file
View File

@@ -0,0 +1,82 @@
import { Ollama } from "npm:ollama";
import * as cheerio from "npm:cheerio@1.0.0";
import { Readability } from "jsr:@paoramen/cheer-reader";
import type { Config } from "./main.ts";
export async function getNewsUrls(
config: Config,
query: string,
): Promise<string[] | undefined> {
try {
const response = await fetch(
`${config.search_url}?q=${query}&format=json`,
);
if (!response.ok) {
throw new Error(
`Failed to fetch results for query "${query}": ${response.statusText}`,
);
}
const data = await response.json();
return data.results
.map((result: { url: string }) => result.url)
.slice(0, 3);
} catch (error: any) {
console.error(
`Error fetching news URLs for query "${query}":`,
error.message,
);
return undefined;
}
}
export async function cleanTextFromHtml(url: string): Promise<string> {
try {
const response = await fetch(url);
if (!response.ok) {
// ToDo: It would be great to fetch additional sources, or skip to next
throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
}
const html = await response.text();
return htmlToText(html).trim();
} catch (error: any) {
console.error(`Error fetching URL '${url}':`, error.message);
throw error;
}
}
function htmlToText(html: string): string {
const $ = cheerio.load(html);
return new Readability($).parse().textContent || "";
}
export async function answerQuery(
config: Config,
query: string,
texts: string,
) {
const ollama = new Ollama();
if (!config.model) {
throw new Error(`No model in config: ${config}`);
}
try {
const responseStream = await ollama.generate({
model: config.model,
prompt:
`For the topic of ${query}, provide a summary of the information in the following articles:\n${texts}`,
stream: true,
});
for await (const chunk of responseStream) {
if (!chunk.done) {
await Deno.stdout.write(new TextEncoder().encode(chunk.response));
}
}
} catch (error: any) {
console.error("Error answering query:", error.message);
throw error;
} finally {
void ollama;
}
}

BIN
websearch_linux_x68 Executable file

Binary file not shown.