Initial commit of Websearch
This commit is contained in:
54
README.md
54
README.md
@@ -1,3 +1,55 @@
|
|||||||
# websearch
|
# websearch
|
||||||
|
|
||||||
CLI websearch with an LLM summary using SearXNG to get search results.
|
CLI websearch with an LLM summary using SearXNG to get search results.
|
||||||
|
|
||||||
|
Websearch is a Deno project that will grow to be provide more and more useful
|
||||||
|
summaries of information on the web for a given topic. It could easily be built
|
||||||
|
with another framework, but this is also an education project to learn the new
|
||||||
|
framework.
|
||||||
|
|
||||||
|
This is an example of using an LLM with knowledge outside of the Model.
|
||||||
|
|
||||||
|
## Depenecies
|
||||||
|
|
||||||
|
Websearch is dependent on Ollama running localy with a model intended to perform
|
||||||
|
the analysis/summary of the search information. That websearch is right now
|
||||||
|
using SearXNG, but could be easily ported to other providers.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
If you want to use this in GNU/Linux on an x68 platform, a pre-built binary is
|
||||||
|
in the repository. Copy it to a location in your `PATH` like `~/.local/bin/ws`.
|
||||||
|
|
||||||
|
On the first run, you'll need to tell Websearch what Ollama Model to user and
|
||||||
|
where the SearXNG endpoint is with the `--model MODEL` and
|
||||||
|
`--search_url SEARCH_URL` comand line flags, as well as provide a search query.
|
||||||
|
```
|
||||||
|
$ ws --model=Ollama3.1 --search_url=http://localhost:8000 \
|
||||||
|
Movies releasing in theaters in 2025
|
||||||
|
```
|
||||||
|
|
||||||
|
### Building for other platforms
|
||||||
|
|
||||||
|
See the [Deno Compile Options](https://docs.deno.com/runtime/reference/cli/compiler/#compile-options)
|
||||||
|
for supported platform targets.
|
||||||
|
|
||||||
|
Example for GNU/Linux_x68:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ deno compile \
|
||||||
|
--target x86_64-unknown-linux-gnu \
|
||||||
|
--allow-env=HOME,NODE_V8_COVERAGE \
|
||||||
|
--allow-net \
|
||||||
|
--allow-read \
|
||||||
|
--allow-write \
|
||||||
|
main.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Once Websearch runs successfuly, the model and search_url are saved to the
|
||||||
|
configuration file at `~/.config/websearch/config.yml`. This is used so
|
||||||
|
subsequent calls can omit the flags.
|
||||||
|
|
||||||
|
If you provide a flag with an updated option, that will be updated in the
|
||||||
|
config.
|
||||||
|
12
deno.json
Normal file
12
deno.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"tasks": {
|
||||||
|
"dev": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts",
|
||||||
|
"prod": "deno run --allow-env=HOME,NODE_V8_COVERAGE --allow-net --allow-read --allow-write main.ts"
|
||||||
|
},
|
||||||
|
"imports": {
|
||||||
|
"@std/assert": "jsr:@std/assert@1",
|
||||||
|
"@std/cli": "jsr:@std/cli@^1.0.6",
|
||||||
|
"@std/fs": "jsr:@std/fs@^1.0.5",
|
||||||
|
"@std/yaml": "jsr:@std/yaml@^1.0.5"
|
||||||
|
}
|
||||||
|
}
|
111
main.ts
Normal file
111
main.ts
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import { parseArgs } from "@std/cli/parse-args";
|
||||||
|
import type { Args } from "@std/cli/parse-args";
|
||||||
|
import { ensureDir } from "@std/fs";
|
||||||
|
import { parse, stringify } from "@std/yaml";
|
||||||
|
import { answerQuery, cleanTextFromHtml, getNewsUrls } from "./websearch.ts";
|
||||||
|
|
||||||
|
export interface Config {
|
||||||
|
model?: string;
|
||||||
|
search_url?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const parsed: Args = parseArgs(Deno.args, {
|
||||||
|
boolean: ["help"],
|
||||||
|
string: ["model", "search_url"],
|
||||||
|
});
|
||||||
|
|
||||||
|
if (parsed.help) {
|
||||||
|
console.log(`
|
||||||
|
Usage:
|
||||||
|
websearch [--model MODEL] [--search_url SEARCH_URL] QUERY
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--model Ollama model to use
|
||||||
|
--search_url URL for SearXNG endpoint
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
QUERY The topic to search for news to summarize
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
The websearch configuration file is stored in the XDG Config directory
|
||||||
|
/home/$USER/.config/websearch/config.yml. Both the model and search_url
|
||||||
|
can be customized as an alternative to providing the options for each.
|
||||||
|
If both the Ollama model and SearXNG endpoint are successful, the
|
||||||
|
configuration is automatically saved/updated.
|
||||||
|
`);
|
||||||
|
Deno.exit();
|
||||||
|
}
|
||||||
|
|
||||||
|
const configDir = `${Deno.env.get("HOME")}/.config/websearch`;
|
||||||
|
const configPath = `${Deno.env.get("HOME")}/.config/websearch/config.yml`;
|
||||||
|
|
||||||
|
async function loadConfig(): Promise<Config> {
|
||||||
|
try {
|
||||||
|
const yamlString = await Deno.readTextFile(configPath);
|
||||||
|
return parse(yamlString) as Config;
|
||||||
|
} catch {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function saveConfig(config: Config) {
|
||||||
|
await ensureDir(configDir);
|
||||||
|
await Deno.writeTextFile(
|
||||||
|
configPath,
|
||||||
|
stringify(config),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main(args: Args) {
|
||||||
|
const query = args._.join(" ");
|
||||||
|
if (!query) {
|
||||||
|
throw new Error("Please provide a search query");
|
||||||
|
}
|
||||||
|
console.log(`Query: ${query}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const config = await loadConfig();
|
||||||
|
if (!config.model) {
|
||||||
|
if (args.model) {
|
||||||
|
config.model = args.model;
|
||||||
|
} else {
|
||||||
|
throw new Error("Provide --model or add Ollama model to configuration");
|
||||||
|
}
|
||||||
|
} else if (args.model && args.model !== config.model) {
|
||||||
|
config.model = args.model;
|
||||||
|
}
|
||||||
|
if (!config.search_url) {
|
||||||
|
if (args.search_url) {
|
||||||
|
config.search_url = args.search_url;
|
||||||
|
} else {
|
||||||
|
throw new Error(
|
||||||
|
"Provide --search_url or add search_url to configuration",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else if (args.search_url && args.search_url !== config.search_url) {
|
||||||
|
config.search_url = args.search_url;
|
||||||
|
}
|
||||||
|
|
||||||
|
const urls = await getNewsUrls(config, query);
|
||||||
|
if (!urls || urls.length === 0) {
|
||||||
|
console.log("No results");
|
||||||
|
Deno.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const cleanedTexts = await Promise.all(
|
||||||
|
urls.map((url) => cleanTextFromHtml(url)),
|
||||||
|
);
|
||||||
|
await answerQuery(config, query, cleanedTexts.join("\n\n"));
|
||||||
|
await saveConfig(config);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Error processing query "${query}":`, error.message);
|
||||||
|
Deno.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (import.meta.main) {
|
||||||
|
main(parsed).catch((error) => {
|
||||||
|
console.error("Unhandled exception:", error);
|
||||||
|
Deno.exit(1);
|
||||||
|
});
|
||||||
|
}
|
82
websearch.ts
Normal file
82
websearch.ts
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import { Ollama } from "npm:ollama";
|
||||||
|
import * as cheerio from "npm:cheerio@1.0.0";
|
||||||
|
import { Readability } from "jsr:@paoramen/cheer-reader";
|
||||||
|
import type { Config } from "./main.ts";
|
||||||
|
|
||||||
|
export async function getNewsUrls(
|
||||||
|
config: Config,
|
||||||
|
query: string,
|
||||||
|
): Promise<string[] | undefined> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(
|
||||||
|
`${config.search_url}?q=${query}&format=json`,
|
||||||
|
);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(
|
||||||
|
`Failed to fetch results for query "${query}": ${response.statusText}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
return data.results
|
||||||
|
.map((result: { url: string }) => result.url)
|
||||||
|
.slice(0, 3);
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(
|
||||||
|
`Error fetching news URLs for query "${query}":`,
|
||||||
|
error.message,
|
||||||
|
);
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function cleanTextFromHtml(url: string): Promise<string> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(url);
|
||||||
|
if (!response.ok) {
|
||||||
|
// ToDo: It would be great to fetch additional sources, or skip to next
|
||||||
|
throw new Error(`Failed to fetch ${url}: ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const html = await response.text();
|
||||||
|
return htmlToText(html).trim();
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error(`Error fetching URL '${url}':`, error.message);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function htmlToText(html: string): string {
|
||||||
|
const $ = cheerio.load(html);
|
||||||
|
return new Readability($).parse().textContent || "";
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function answerQuery(
|
||||||
|
config: Config,
|
||||||
|
query: string,
|
||||||
|
texts: string,
|
||||||
|
) {
|
||||||
|
const ollama = new Ollama();
|
||||||
|
if (!config.model) {
|
||||||
|
throw new Error(`No model in config: ${config}`);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const responseStream = await ollama.generate({
|
||||||
|
model: config.model,
|
||||||
|
prompt:
|
||||||
|
`For the topic of ${query}, provide a summary of the information in the following articles:\n${texts}`,
|
||||||
|
stream: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
for await (const chunk of responseStream) {
|
||||||
|
if (!chunk.done) {
|
||||||
|
await Deno.stdout.write(new TextEncoder().encode(chunk.response));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error("Error answering query:", error.message);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
void ollama;
|
||||||
|
}
|
||||||
|
}
|
BIN
websearch_linux_x68
Executable file
BIN
websearch_linux_x68
Executable file
Binary file not shown.
Reference in New Issue
Block a user