Initial commit of Websearch
This commit is contained in:
54
README.md
54
README.md
@@ -1,3 +1,55 @@
|
||||
# websearch
|
||||
|
||||
CLI websearch with an LLM summary using SearXNG to get search results.
|
||||
CLI websearch with an LLM summary using SearXNG to get search results.
|
||||
|
||||
Websearch is a Deno project that will grow to be provide more and more useful
|
||||
summaries of information on the web for a given topic. It could easily be built
|
||||
with another framework, but this is also an education project to learn the new
|
||||
framework.
|
||||
|
||||
This is an example of using an LLM with knowledge outside of the Model.
|
||||
|
||||
## Depenecies
|
||||
|
||||
Websearch is dependent on Ollama running localy with a model intended to perform
|
||||
the analysis/summary of the search information. That websearch is right now
|
||||
using SearXNG, but could be easily ported to other providers.
|
||||
|
||||
## Usage
|
||||
|
||||
If you want to use this in GNU/Linux on an x68 platform, a pre-built binary is
|
||||
in the repository. Copy it to a location in your `PATH` like `~/.local/bin/ws`.
|
||||
|
||||
On the first run, you'll need to tell Websearch what Ollama Model to user and
|
||||
where the SearXNG endpoint is with the `--model MODEL` and
|
||||
`--search_url SEARCH_URL` comand line flags, as well as provide a search query.
|
||||
```
|
||||
$ ws --model=Ollama3.1 --search_url=http://localhost:8000 \
|
||||
Movies releasing in theaters in 2025
|
||||
```
|
||||
|
||||
### Building for other platforms
|
||||
|
||||
See the [Deno Compile Options](https://docs.deno.com/runtime/reference/cli/compiler/#compile-options)
|
||||
for supported platform targets.
|
||||
|
||||
Example for GNU/Linux_x68:
|
||||
|
||||
```
|
||||
$ deno compile \
|
||||
--target x86_64-unknown-linux-gnu \
|
||||
--allow-env=HOME,NODE_V8_COVERAGE \
|
||||
--allow-net \
|
||||
--allow-read \
|
||||
--allow-write \
|
||||
main.ts
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Once Websearch runs successfuly, the model and search_url are saved to the
|
||||
configuration file at `~/.config/websearch/config.yml`. This is used so
|
||||
subsequent calls can omit the flags.
|
||||
|
||||
If you provide a flag with an updated option, that will be updated in the
|
||||
config.
|
||||
|
12
deno.json
Normal file
12
deno.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"tasks": {
|
||||
"dev": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts",
|
||||
"prod": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts"
|
||||
},
|
||||
"imports": {
|
||||
"@std/assert": "jsr:@std/assert@1",
|
||||
"@std/cli": "jsr:@std/cli@^1.0.6",
|
||||
"@std/fs": "jsr:@std/fs@^1.0.5",
|
||||
"@std/yaml": "jsr:@std/yaml@^1.0.5"
|
||||
}
|
||||
}
|
111
main.ts
Normal file
111
main.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { parseArgs } from "@std/cli/parse-args";
|
||||
import type { Args } from "@std/cli/parse-args";
|
||||
import { ensureDir } from "@std/fs";
|
||||
import { parse, stringify } from "@std/yaml";
|
||||
import { answerQuery, cleanTextFromHtml, getNewsUrls } from "./websearch.ts";
|
||||
|
||||
export interface Config {
|
||||
model?: string;
|
||||
search_url?: string;
|
||||
}
|
||||
|
||||
const parsed: Args = parseArgs(Deno.args, {
|
||||
boolean: ["help"],
|
||||
string: ["model", "search_url"],
|
||||
});
|
||||
|
||||
if (parsed.help) {
|
||||
console.log(`
|
||||
Usage:
|
||||
websearch [--model MODEL] [--search_url SEARCH_URL] QUERY
|
||||
|
||||
Options:
|
||||
--model Ollama model to use
|
||||
--search_url URL for SearXNG endpoint
|
||||
|
||||
Arguments:
|
||||
QUERY The topic to search for news to summarize
|
||||
|
||||
Configuration:
|
||||
The websearch configuration file is stored in the XDG Config directory
|
||||
/home/$USER/.config/websearch/config.yml. Both the model and search_url
|
||||
can be customized as an alternative to providing the options for each.
|
||||
If both the Ollama model and SearXNG endpoint are successful, the
|
||||
configuration is automatically saved/updated.
|
||||
`);
|
||||
Deno.exit();
|
||||
}
|
||||
|
||||
const configDir = `${Deno.env.get("HOME")}/.config/websearch`;
|
||||
const configPath = `${Deno.env.get("HOME")}/.config/websearch/config.yml`;
|
||||
|
||||
async function loadConfig(): Promise<Config> {
|
||||
try {
|
||||
const yamlString = await Deno.readTextFile(configPath);
|
||||
return parse(yamlString) as Config;
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
async function saveConfig(config: Config) {
|
||||
await ensureDir(configDir);
|
||||
await Deno.writeTextFile(
|
||||
configPath,
|
||||
stringify(config),
|
||||
);
|
||||
}
|
||||
|
||||
async function main(args: Args) {
|
||||
const query = args._.join(" ");
|
||||
if (!query) {
|
||||
throw new Error("Please provide a search query");
|
||||
}
|
||||
console.log(`Query: ${query}`);
|
||||
|
||||
try {
|
||||
const config = await loadConfig();
|
||||
if (!config.model) {
|
||||
if (args.model) {
|
||||
config.model = args.model;
|
||||
} else {
|
||||
throw new Error("Provide --model or add Ollama model to configuration");
|
||||
}
|
||||
} else if (args.model && args.model !== config.model) {
|
||||
config.model = args.model;
|
||||
}
|
||||
if (!config.search_url) {
|
||||
if (args.search_url) {
|
||||
config.search_url = args.search_url;
|
||||
} else {
|
||||
throw new Error(
|
||||
"Provide --search_url or add search_url to configuration",
|
||||
);
|
||||
}
|
||||
} else if (args.search_url && args.search_url !== config.search_url) {
|
||||
config.search_url = args.search_url;
|
||||
}
|
||||
|
||||
const urls = await getNewsUrls(config, query);
|
||||
if (!urls || urls.length === 0) {
|
||||
console.log("No results");
|
||||
Deno.exit(1);
|
||||
}
|
||||
|
||||
const cleanedTexts = await Promise.all(
|
||||
urls.map((url) => cleanTextFromHtml(url)),
|
||||
);
|
||||
await answerQuery(config, query, cleanedTexts.join("\n\n"));
|
||||
await saveConfig(config);
|
||||
} catch (error: any) {
|
||||
console.error(`Error processing query "${query}":`, error.message);
|
||||
Deno.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (import.meta.main) {
|
||||
main(parsed).catch((error) => {
|
||||
console.error("Unhandled exception:", error);
|
||||
Deno.exit(1);
|
||||
});
|
||||
}
|
82
websearch.ts
Normal file
82
websearch.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import { Ollama } from "npm:ollama";
|
||||
import * as cheerio from "npm:cheerio@1.0.0";
|
||||
import { Readability } from "jsr:@paoramen/cheer-reader";
|
||||
import type { Config } from "./main.ts";
|
||||
|
||||
export async function getNewsUrls(
|
||||
config: Config,
|
||||
query: string,
|
||||
): Promise<string[] | undefined> {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`${config.search_url}?q=${query}&format=json`,
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Failed to fetch results for query "${query}": ${response.statusText}`,
|
||||
);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.results
|
||||
.map((result: { url: string }) => result.url)
|
||||
.slice(0, 3);
|
||||
} catch (error: any) {
|
||||
console.error(
|
||||
`Error fetching news URLs for query "${query}":`,
|
||||
error.message,
|
||||
);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export async function cleanTextFromHtml(url: string): Promise<string> {
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
if (!response.ok) {
|
||||
// ToDo: It would be great to fetch additional sources, or skip to next
|
||||
throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const html = await response.text();
|
||||
return htmlToText(html).trim();
|
||||
} catch (error: any) {
|
||||
console.error(`Error fetching URL '${url}':`, error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function htmlToText(html: string): string {
|
||||
const $ = cheerio.load(html);
|
||||
return new Readability($).parse().textContent || "";
|
||||
}
|
||||
|
||||
export async function answerQuery(
|
||||
config: Config,
|
||||
query: string,
|
||||
texts: string,
|
||||
) {
|
||||
const ollama = new Ollama();
|
||||
if (!config.model) {
|
||||
throw new Error(`No model in config: ${config}`);
|
||||
}
|
||||
try {
|
||||
const responseStream = await ollama.generate({
|
||||
model: config.model,
|
||||
prompt:
|
||||
`For the topic of ${query}, provide a summary of the information in the following articles:\n${texts}`,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
for await (const chunk of responseStream) {
|
||||
if (!chunk.done) {
|
||||
await Deno.stdout.write(new TextEncoder().encode(chunk.response));
|
||||
}
|
||||
}
|
||||
} catch (error: any) {
|
||||
console.error("Error answering query:", error.message);
|
||||
throw error;
|
||||
} finally {
|
||||
void ollama;
|
||||
}
|
||||
}
|
BIN
websearch_linux_x68
Executable file
BIN
websearch_linux_x68
Executable file
Binary file not shown.
Reference in New Issue
Block a user