diff --git a/README.md b/README.md index 5239ed0..6520eaf 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,55 @@ # websearch -CLI websearch with an LLM summary using SearXNG to get search results. \ No newline at end of file +CLI websearch with an LLM summary using SearXNG to get search results. + +Websearch is a Deno project that will grow to be provide more and more useful +summaries of information on the web for a given topic. It could easily be built +with another framework, but this is also an education project to learn the new +framework. + +This is an example of using an LLM with knowledge outside of the Model. + +## Depenecies + +Websearch is dependent on Ollama running localy with a model intended to perform +the analysis/summary of the search information. That websearch is right now +using SearXNG, but could be easily ported to other providers. + +## Usage + +If you want to use this in GNU/Linux on an x68 platform, a pre-built binary is +in the repository. Copy it to a location in your `PATH` like `~/.local/bin/ws`. + +On the first run, you'll need to tell Websearch what Ollama Model to user and +where the SearXNG endpoint is with the `--model MODEL` and +`--search_url SEARCH_URL` comand line flags, as well as provide a search query. +``` +$ ws --model=Ollama3.1 --search_url=http://localhost:8000 \ + Movies releasing in theaters in 2025 +``` + +### Building for other platforms + +See the [Deno Compile Options](https://docs.deno.com/runtime/reference/cli/compiler/#compile-options) +for supported platform targets. + +Example for GNU/Linux_x68: + +``` +$ deno compile \ + --target x86_64-unknown-linux-gnu \ + --allow-env=HOME,NODE_V8_COVERAGE \ + --allow-net \ + --allow-read \ + --allow-write \ + main.ts +``` + +## Configuration + +Once Websearch runs successfuly, the model and search_url are saved to the +configuration file at `~/.config/websearch/config.yml`. This is used so +subsequent calls can omit the flags. + +If you provide a flag with an updated option, that will be updated in the +config. diff --git a/deno.json b/deno.json new file mode 100644 index 0000000..b1b6d2e --- /dev/null +++ b/deno.json @@ -0,0 +1,12 @@ +{ + "tasks": { + "dev": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts", + "prod": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts" + }, + "imports": { + "@std/assert": "jsr:@std/assert@1", + "@std/cli": "jsr:@std/cli@^1.0.6", + "@std/fs": "jsr:@std/fs@^1.0.5", + "@std/yaml": "jsr:@std/yaml@^1.0.5" + } +} diff --git a/main.ts b/main.ts new file mode 100644 index 0000000..cf62f28 --- /dev/null +++ b/main.ts @@ -0,0 +1,111 @@ +import { parseArgs } from "@std/cli/parse-args"; +import type { Args } from "@std/cli/parse-args"; +import { ensureDir } from "@std/fs"; +import { parse, stringify } from "@std/yaml"; +import { answerQuery, cleanTextFromHtml, getNewsUrls } from "./websearch.ts"; + +export interface Config { + model?: string; + search_url?: string; +} + +const parsed: Args = parseArgs(Deno.args, { + boolean: ["help"], + string: ["model", "search_url"], +}); + +if (parsed.help) { + console.log(` + Usage: + websearch [--model MODEL] [--search_url SEARCH_URL] QUERY + + Options: + --model Ollama model to use + --search_url URL for SearXNG endpoint + + Arguments: + QUERY The topic to search for news to summarize + + Configuration: + The websearch configuration file is stored in the XDG Config directory + /home/$USER/.config/websearch/config.yml. Both the model and search_url + can be customized as an alternative to providing the options for each. + If both the Ollama model and SearXNG endpoint are successful, the + configuration is automatically saved/updated. + `); + Deno.exit(); +} + +const configDir = `${Deno.env.get("HOME")}/.config/websearch`; +const configPath = `${Deno.env.get("HOME")}/.config/websearch/config.yml`; + +async function loadConfig(): Promise { + try { + const yamlString = await Deno.readTextFile(configPath); + return parse(yamlString) as Config; + } catch { + return {}; + } +} + +async function saveConfig(config: Config) { + await ensureDir(configDir); + await Deno.writeTextFile( + configPath, + stringify(config), + ); +} + +async function main(args: Args) { + const query = args._.join(" "); + if (!query) { + throw new Error("Please provide a search query"); + } + console.log(`Query: ${query}`); + + try { + const config = await loadConfig(); + if (!config.model) { + if (args.model) { + config.model = args.model; + } else { + throw new Error("Provide --model or add Ollama model to configuration"); + } + } else if (args.model && args.model !== config.model) { + config.model = args.model; + } + if (!config.search_url) { + if (args.search_url) { + config.search_url = args.search_url; + } else { + throw new Error( + "Provide --search_url or add search_url to configuration", + ); + } + } else if (args.search_url && args.search_url !== config.search_url) { + config.search_url = args.search_url; + } + + const urls = await getNewsUrls(config, query); + if (!urls || urls.length === 0) { + console.log("No results"); + Deno.exit(1); + } + + const cleanedTexts = await Promise.all( + urls.map((url) => cleanTextFromHtml(url)), + ); + await answerQuery(config, query, cleanedTexts.join("\n\n")); + await saveConfig(config); + } catch (error: any) { + console.error(`Error processing query "${query}":`, error.message); + Deno.exit(1); + } +} + +if (import.meta.main) { + main(parsed).catch((error) => { + console.error("Unhandled exception:", error); + Deno.exit(1); + }); +} diff --git a/websearch.ts b/websearch.ts new file mode 100644 index 0000000..a8b7314 --- /dev/null +++ b/websearch.ts @@ -0,0 +1,82 @@ +import { Ollama } from "npm:ollama"; +import * as cheerio from "npm:cheerio@1.0.0"; +import { Readability } from "jsr:@paoramen/cheer-reader"; +import type { Config } from "./main.ts"; + +export async function getNewsUrls( + config: Config, + query: string, +): Promise { + try { + const response = await fetch( + `${config.search_url}?q=${query}&format=json`, + ); + if (!response.ok) { + throw new Error( + `Failed to fetch results for query "${query}": ${response.statusText}`, + ); + } + + const data = await response.json(); + return data.results + .map((result: { url: string }) => result.url) + .slice(0, 3); + } catch (error: any) { + console.error( + `Error fetching news URLs for query "${query}":`, + error.message, + ); + return undefined; + } +} + +export async function cleanTextFromHtml(url: string): Promise { + try { + const response = await fetch(url); + if (!response.ok) { + // ToDo: It would be great to fetch additional sources, or skip to next + throw new Error(`Failed to fetch ${url}: ${response.statusText}`); + } + + const html = await response.text(); + return htmlToText(html).trim(); + } catch (error: any) { + console.error(`Error fetching URL '${url}':`, error.message); + throw error; + } +} + +function htmlToText(html: string): string { + const $ = cheerio.load(html); + return new Readability($).parse().textContent || ""; +} + +export async function answerQuery( + config: Config, + query: string, + texts: string, +) { + const ollama = new Ollama(); + if (!config.model) { + throw new Error(`No model in config: ${config}`); + } + try { + const responseStream = await ollama.generate({ + model: config.model, + prompt: + `For the topic of ${query}, provide a summary of the information in the following articles:\n${texts}`, + stream: true, + }); + + for await (const chunk of responseStream) { + if (!chunk.done) { + await Deno.stdout.write(new TextEncoder().encode(chunk.response)); + } + } + } catch (error: any) { + console.error("Error answering query:", error.message); + throw error; + } finally { + void ollama; + } +} diff --git a/websearch_linux_x68 b/websearch_linux_x68 new file mode 100755 index 0000000..c54d628 Binary files /dev/null and b/websearch_linux_x68 differ