diff --git a/backend.dockerfile b/backend.dockerfile index 4886573..0169218 100644 --- a/backend.dockerfile +++ b/backend.dockerfile @@ -1,6 +1,7 @@ FROM node:slim ARG SEARXNG_API_URL +ENV SEARXNG_API_URL=${SEARXNG_API_URL} WORKDIR /home/perplexica @@ -11,8 +12,6 @@ COPY drizzle.config.ts /home/perplexica/ COPY package.json /home/perplexica/ COPY yarn.lock /home/perplexica/ -RUN sed -i "s|SEARXNG = \".*\"|SEARXNG = \"${SEARXNG_API_URL}\"|g" /home/perplexica/config.toml - RUN mkdir /home/perplexica/data RUN yarn install diff --git a/docker-compose.yaml b/docker-compose.yaml index d6f9203..d3892e5 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -21,6 +21,7 @@ services: - 3001:3001 volumes: - backend-dbstore:/home/perplexica/data + - ./config.toml:/home/perplexica/config.toml extra_hosts: - 'host.docker.internal:host-gateway' networks: diff --git a/docs/installation/UPDATING.md b/docs/installation/UPDATING.md index fa89b41..df67775 100644 --- a/docs/installation/UPDATING.md +++ b/docs/installation/UPDATING.md @@ -17,6 +17,7 @@ To update Perplexica to the latest version, follow these steps: ```bash docker compose up -d --build ``` + 4. Once the command completes running go to http://localhost:3000 and verify the latest changes. ## For non Docker users diff --git a/package.json b/package.json index 3c74d80..72d411c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "perplexica-backend", - "version": "1.8.0", + "version": "1.9.0-rc2", "license": "MIT", "author": "ItzCrazyKns", "scripts": { @@ -15,6 +15,8 @@ "@types/better-sqlite3": "^7.6.10", "@types/cors": "^2.8.17", "@types/express": "^4.17.21", + "@types/html-to-text": "^9.0.4", + "@types/pdf-parse": "^1.1.4", "@types/readable-stream": "^4.0.11", "drizzle-kit": "^0.22.7", "nodemon": "^3.1.0", @@ -37,7 +39,9 @@ "dotenv": "^16.4.5", "drizzle-orm": "^0.31.2", "express": "^4.19.2", + "html-to-text": "^9.0.5", "langchain": "^0.1.30", + "pdf-parse": "^1.1.1", "winston": "^3.13.0", "ws": "^8.17.1", "zod": "^3.22.4" diff --git a/searxng/settings.yml b/searxng/settings.yml index da973c1..54d27c4 100644 --- a/searxng/settings.yml +++ b/searxng/settings.yml @@ -1,2356 +1,17 @@ -general: - # Debug mode, only for development. Is overwritten by ${SEARXNG_DEBUG} - debug: false - # displayed name - instance_name: 'searxng' - # For example: https://example.com/privacy - privacypolicy_url: false - # use true to use your own donation page written in searx/info/en/donate.md - # use false to disable the donation link - donation_url: false - # mailto:contact@example.com - contact_url: false - # record stats - enable_metrics: true +use_default_settings: true -brand: - new_issue_url: https://github.com/searxng/searxng/issues/new - docs_url: https://docs.searxng.org/ - public_instances: https://searx.space - wiki_url: https://github.com/searxng/searxng/wiki - issue_url: https://github.com/searxng/searxng/issues - # custom: - # maintainer: "Jon Doe" - # # Custom entries in the footer: [title]: [link] - # links: - # Uptime: https://uptime.searxng.org/history/darmarit-org - # About: "https://searxng.org" +general: + instance_name: 'searxng' search: - # Filter results. 0: None, 1: Moderate, 2: Strict - safe_search: 0 - # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "yandex", "mwmbl", - # "seznam", "startpage", "stract", "swisscows", "qwant", "wikipedia" - leave blank to turn it off - # by default. autocomplete: 'google' - # minimun characters to type before autocompleter starts - autocomplete_min: 4 - # Default search language - leave blank to detect from browser information or - # use codes from 'languages.py' - default_lang: 'auto' - # max_page: 0 # if engine supports paging, 0 means unlimited numbers of pages - # Available languages - # languages: - # - all - # - en - # - en-US - # - de - # - it-IT - # - fr - # - fr-BE - # ban time in seconds after engine errors - ban_time_on_fail: 5 - # max ban time in seconds after engine errors - max_ban_time_on_fail: 120 - suspended_times: - # Engine suspension time after error (in seconds; set to 0 to disable) - # For error "Access denied" and "HTTP error [402, 403]" - SearxEngineAccessDenied: 86400 - # For error "CAPTCHA" - SearxEngineCaptcha: 86400 - # For error "Too many request" and "HTTP error 429" - SearxEngineTooManyRequests: 3600 - # Cloudflare CAPTCHA - cf_SearxEngineCaptcha: 1296000 - cf_SearxEngineAccessDenied: 86400 - # ReCAPTCHA - recaptcha_SearxEngineCaptcha: 604800 - - # remove format to deny access, use lower case. - # formats: [html, csv, json, rss] formats: - html - json server: - # Is overwritten by ${SEARXNG_PORT} and ${SEARXNG_BIND_ADDRESS} - port: 8888 - bind_address: '127.0.0.1' - # public URL of the instance, to ensure correct inbound links. Is overwritten - # by ${SEARXNG_URL}. - base_url: / # "http://example.com/location" - limiter: false # rate limit the number of request on the instance, block some bots - public_instance: false # enable features designed only for public instances - - # If your instance owns a /etc/searxng/settings.yml file, then set the following - # values there. - secret_key: 'a2fb23f1b02e6ee83875b09826990de0f6bd908b6638e8c10277d415f6ab852b' # Is overwritten by ${SEARXNG_SECRET} - # Proxying image results through searx - image_proxy: false - # 1.0 and 1.1 are supported - http_protocol_version: '1.0' - # POST queries are more secure as they don't show up in history but may cause - # problems when using Firefox containers - method: 'POST' - default_http_headers: - X-Content-Type-Options: nosniff - X-Download-Options: noopen - X-Robots-Tag: noindex, nofollow - Referrer-Policy: no-referrer - -redis: - # URL to connect redis database. Is overwritten by ${SEARXNG_REDIS_URL}. - # https://docs.searxng.org/admin/settings/settings_redis.html#settings-redis - url: false - -ui: - # Custom static path - leave it blank if you didn't change - static_path: '' - static_use_hash: false - # Custom templates path - leave it blank if you didn't change - templates_path: '' - # query_in_title: When true, the result page's titles contains the query - # it decreases the privacy, since the browser can records the page titles. - query_in_title: false - # infinite_scroll: When true, automatically loads the next page when scrolling to bottom of the current page. - infinite_scroll: false - # ui theme - default_theme: simple - # center the results ? - center_alignment: false - # URL prefix of the internet archive, don't forget trailing slash (if needed). - # cache_url: "https://webcache.googleusercontent.com/search?q=cache:" - # Default interface locale - leave blank to detect from browser information or - # use codes from the 'locales' config section - default_locale: '' - # Open result links in a new tab by default - # results_on_new_tab: false - theme_args: - # style of simple theme: auto, light, dark - simple_style: auto - # Perform search immediately if a category selected. - # Disable to select multiple categories at once and start the search manually. - search_on_category_select: true - # Hotkeys: default or vim - hotkeys: default - -# Lock arbitrary settings on the preferences page. To find the ID of the user -# setting you want to lock, check the ID of the form on the page "preferences". -# -# preferences: -# lock: -# - language -# - autocomplete -# - method -# - query_in_title - -# searx supports result proxification using an external service: -# https://github.com/asciimoo/morty uncomment below section if you have running -# morty proxy the key is base64 encoded (keep the !!binary notation) -# Note: since commit af77ec3, morty accepts a base64 encoded key. -# -# result_proxy: -# url: http://127.0.0.1:3000/ -# # the key is a base64 encoded string, the YAML !!binary prefix is optional -# key: !!binary "your_morty_proxy_key" -# # [true|false] enable the "proxy" button next to each result -# proxify_results: true - -# communication with search engines -# -outgoing: - # default timeout in seconds, can be override by engine - request_timeout: 3.0 - # the maximum timeout in seconds - # max_request_timeout: 10.0 - # suffix of searx_useragent, could contain information like an email address - # to the administrator - useragent_suffix: '' - # The maximum number of concurrent connections that may be established. - pool_connections: 100 - # Allow the connection pool to maintain keep-alive connections below this - # point. - pool_maxsize: 20 - # See https://www.python-httpx.org/http2/ - enable_http2: true - # uncomment below section if you want to use a custom server certificate - # see https://www.python-httpx.org/advanced/#changing-the-verification-defaults - # and https://www.python-httpx.org/compatibility/#ssl-configuration - # verify: ~/.mitmproxy/mitmproxy-ca-cert.cer - # - # uncomment below section if you want to use a proxyq see: SOCKS proxies - # https://2.python-requests.org/en/latest/user/advanced/#proxies - # are also supported: see - # https://2.python-requests.org/en/latest/user/advanced/#socks - # - # proxies: - # all://: - # - http://proxy1:8080 - # - http://proxy2:8080 - # - # using_tor_proxy: true - # - # Extra seconds to add in order to account for the time taken by the proxy - # - # extra_proxy_timeout: 10.0 - # - # uncomment below section only if you have more than one network interface - # which can be the source of outgoing search requests - # - # source_ips: - # - 1.1.1.1 - # - 1.1.1.2 - # - fe80::/126 - -# External plugin configuration, for more details see -# https://docs.searxng.org/dev/plugins.html -# -# plugins: -# - plugin1 -# - plugin2 -# - ... - -# Comment or un-comment plugin to activate / deactivate by default. -# -# enabled_plugins: -# # these plugins are enabled if nothing is configured .. -# - 'Hash plugin' -# - 'Self Information' -# - 'Tracker URL remover' -# - 'Ahmia blacklist' # activation depends on outgoing.using_tor_proxy -# # these plugins are disabled if nothing is configured .. -# - 'Hostname replace' # see hostname_replace configuration below -# - 'Open Access DOI rewrite' -# - 'Tor check plugin' -# # Read the docs before activate: auto-detection of the language could be -# # detrimental to users expectations / users can activate the plugin in the -# # preferences if they want. -# - 'Autodetect search language' - -# Configuration of the "Hostname replace" plugin: -# -# hostname_replace: -# '(.*\.)?youtube\.com$': 'invidious.example.com' -# '(.*\.)?youtu\.be$': 'invidious.example.com' -# '(.*\.)?youtube-noocookie\.com$': 'yotter.example.com' -# '(.*\.)?reddit\.com$': 'teddit.example.com' -# '(.*\.)?redd\.it$': 'teddit.example.com' -# '(www\.)?twitter\.com$': 'nitter.example.com' -# # to remove matching host names from result list, set value to false -# 'spam\.example\.com': false - -checker: - # disable checker when in debug mode - off_when_debug: true - - # use "scheduling: false" to disable scheduling - # scheduling: interval or int - - # to activate the scheduler: - # * uncomment "scheduling" section - # * add "cache2 = name=searxngcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" - # to your uwsgi.ini - - # scheduling: - # start_after: [300, 1800] # delay to start the first run of the checker - # every: [86400, 90000] # how often the checker runs - - # additional tests: only for the YAML anchors (see the engines section) - # - additional_tests: - rosebud: &test_rosebud - matrix: - query: rosebud - lang: en - result_container: - - not_empty - - ['one_title_contains', 'citizen kane'] - test: - - unique_results - - android: &test_android - matrix: - query: ['android'] - lang: ['en', 'de', 'fr', 'zh-CN'] - result_container: - - not_empty - - ['one_title_contains', 'google'] - test: - - unique_results - - # tests: only for the YAML anchors (see the engines section) - tests: - infobox: &tests_infobox - infobox: - matrix: - query: ['linux', 'new york', 'bbc'] - result_container: - - has_infobox - -categories_as_tabs: - general: - images: - videos: - news: - map: - music: - it: - science: - files: - social media: engines: - - name: 9gag - engine: 9gag - shortcut: 9g - disabled: true - - - name: annas archive - engine: annas_archive - disabled: true - shortcut: aa - - # - name: annas articles - # engine: annas_archive - # shortcut: aaa - # # https://docs.searxng.org/dev/engines/online/annas_archive.html - # aa_content: 'journal_article' # book_any .. magazine, standards_document - # aa_ext: 'pdf' # pdf, epub, .. - # aa_sort: 'newest' # newest, oldest, largest, smallest - - - name: apk mirror - engine: apkmirror - timeout: 4.0 - shortcut: apkm - disabled: true - - - name: apple app store - engine: apple_app_store - shortcut: aps - disabled: true - - # Requires Tor - - name: ahmia - engine: ahmia - categories: onions - enable_http: true - shortcut: ah - - - name: anaconda - engine: xpath - paging: true - first_page_num: 0 - search_url: https://anaconda.org/search?q={query}&page={pageno} - results_xpath: //tbody/tr - url_xpath: ./td/h5/a[last()]/@href - title_xpath: ./td/h5 - content_xpath: ./td[h5]/text() - categories: it - timeout: 6.0 - shortcut: conda - disabled: true - - - name: arch linux wiki - engine: archlinux - shortcut: al - - - name: artic - engine: artic - shortcut: arc - timeout: 4.0 - - - name: arxiv - engine: arxiv - shortcut: arx - timeout: 4.0 - - - name: ask - engine: ask - shortcut: ask - disabled: true - - # tmp suspended: dh key too small - # - name: base - # engine: base - # shortcut: bs - - - name: bandcamp - engine: bandcamp - shortcut: bc - categories: music - - - name: wikipedia - engine: wikipedia - shortcut: wp - # add "list" to the array to get results in the results list - display_type: ['infobox'] - base_url: 'https://{language}.wikipedia.org/' - categories: [general] - - - name: bilibili - engine: bilibili - shortcut: bil - disabled: true - - - name: bing - engine: bing - shortcut: bi - disabled: true - - - name: bing images - engine: bing_images - shortcut: bii - - - name: bing news - engine: bing_news - shortcut: bin - - - name: bing videos - engine: bing_videos - shortcut: biv - - - name: bitbucket - engine: xpath - paging: true - search_url: https://bitbucket.org/repo/all/{pageno}?name={query} - url_xpath: //article[@class="repo-summary"]//a[@class="repo-link"]/@href - title_xpath: //article[@class="repo-summary"]//a[@class="repo-link"] - content_xpath: //article[@class="repo-summary"]/p - categories: [it, repos] - timeout: 4.0 - disabled: true - shortcut: bb - about: - website: https://bitbucket.org/ - wikidata_id: Q2493781 - official_api_documentation: https://developer.atlassian.com/bitbucket - use_official_api: false - require_api_key: false - results: HTML - - - name: bpb - engine: bpb - shortcut: bpb - disabled: true - - - name: btdigg - engine: btdigg - shortcut: bt - disabled: true - - - name: ccc-tv - engine: xpath - paging: false - search_url: https://media.ccc.de/search/?q={query} - url_xpath: //div[@class="caption"]/h3/a/@href - title_xpath: //div[@class="caption"]/h3/a/text() - content_xpath: //div[@class="caption"]/h4/@title - categories: videos - disabled: true - shortcut: c3tv - about: - website: https://media.ccc.de/ - wikidata_id: Q80729951 - official_api_documentation: https://github.com/voc/voctoweb - use_official_api: false - require_api_key: false - results: HTML - # We don't set language: de here because media.ccc.de is not just - # for a German audience. It contains many English videos and many - # German videos have English subtitles. - - - name: openverse - engine: openverse - categories: images - shortcut: opv - - - name: chefkoch - engine: chefkoch - shortcut: chef - # to show premium or plus results too: - # skip_premium: false - - # - name: core.ac.uk - # engine: core - # categories: science - # shortcut: cor - # # get your API key from: https://core.ac.uk/api-keys/register/ - # api_key: 'unset' - - - name: crossref - engine: crossref - shortcut: cr - timeout: 30 - disabled: true - - - name: crowdview - engine: json_engine - shortcut: cv - categories: general - paging: false - search_url: https://crowdview-next-js.onrender.com/api/search-v3?query={query} - results_query: results - url_query: link - title_query: title - content_query: snippet - disabled: true - about: - website: https://crowdview.ai/ - - - name: yep - engine: yep - shortcut: yep - categories: general - search_type: web - disabled: true - - - name: yep images - engine: yep - shortcut: yepi - categories: images - search_type: images - disabled: true - - - name: yep news - engine: yep - shortcut: yepn - categories: news - search_type: news - disabled: true - - - name: curlie - engine: xpath - shortcut: cl - categories: general - disabled: true - paging: true - lang_all: '' - search_url: https://curlie.org/search?q={query}&lang={lang}&start={pageno}&stime=92452189 - page_size: 20 - results_xpath: //div[@id="site-list-content"]/div[@class="site-item"] - url_xpath: ./div[@class="title-and-desc"]/a/@href - title_xpath: ./div[@class="title-and-desc"]/a/div - content_xpath: ./div[@class="title-and-desc"]/div[@class="site-descr"] - about: - website: https://curlie.org/ - wikidata_id: Q60715723 - use_official_api: false - require_api_key: false - results: HTML - - - name: currency - engine: currency_convert - categories: general - shortcut: cc - - - name: bahnhof - engine: json_engine - search_url: https://www.bahnhof.de/api/stations/search/{query} - url_prefix: https://www.bahnhof.de/ - url_query: slug - title_query: name - content_query: state - shortcut: bf - disabled: true - about: - website: https://www.bahn.de - wikidata_id: Q22811603 - use_official_api: false - require_api_key: false - results: JSON - language: de - - - name: deezer - engine: deezer - shortcut: dz - disabled: true - - - name: destatis - engine: destatis - shortcut: destat - disabled: true - - - name: deviantart - engine: deviantart - shortcut: da - timeout: 3.0 - - - name: ddg definitions - engine: duckduckgo_definitions - shortcut: ddd - weight: 2 - disabled: true - tests: *tests_infobox - - # cloudflare protected - # - name: digbt - # engine: digbt - # shortcut: dbt - # timeout: 6.0 - # disabled: true - - - name: docker hub - engine: docker_hub - shortcut: dh - categories: [it, packages] - - - name: erowid - engine: xpath - paging: true - first_page_num: 0 - page_size: 30 - search_url: https://www.erowid.org/search.php?q={query}&s={pageno} - url_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/@href - title_xpath: //dl[@class="results-list"]/dt[@class="result-title"]/a/text() - content_xpath: //dl[@class="results-list"]/dd[@class="result-details"] - categories: [] - shortcut: ew - disabled: true - about: - website: https://www.erowid.org/ - wikidata_id: Q1430691 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - # - name: elasticsearch - # shortcut: es - # engine: elasticsearch - # base_url: http://localhost:9200 - # username: elastic - # password: changeme - # index: my-index - # # available options: match, simple_query_string, term, terms, custom - # query_type: match - # # if query_type is set to custom, provide your query here - # #custom_query_json: {"query":{"match_all": {}}} - # #show_metadata: false - # disabled: true - - - name: wikidata - engine: wikidata - shortcut: wd - timeout: 3.0 - weight: 2 - # add "list" to the array to get results in the results list - display_type: ['infobox'] - tests: *tests_infobox - categories: [general] - - - name: duckduckgo - engine: duckduckgo - shortcut: ddg - - - name: duckduckgo images - engine: duckduckgo_extra - categories: [images, web] - ddg_category: images - shortcut: ddi - disabled: true - - - name: duckduckgo videos - engine: duckduckgo_extra - categories: [videos, web] - ddg_category: videos - shortcut: ddv - disabled: true - - - name: duckduckgo news - engine: duckduckgo_extra - categories: [news, web] - ddg_category: news - shortcut: ddn - disabled: true - - - name: duckduckgo weather - engine: duckduckgo_weather - shortcut: ddw - disabled: true - - - name: apple maps - engine: apple_maps - shortcut: apm - disabled: true - timeout: 5.0 - - - name: emojipedia - engine: emojipedia - timeout: 4.0 - shortcut: em - disabled: true - - - name: tineye - engine: tineye - shortcut: tin - timeout: 9.0 - disabled: true - - - name: etymonline - engine: xpath - paging: true - search_url: https://etymonline.com/search?page={pageno}&q={query} - url_xpath: //a[contains(@class, "word__name--")]/@href - title_xpath: //a[contains(@class, "word__name--")] - content_xpath: //section[contains(@class, "word__defination")] - first_page_num: 1 - shortcut: et - categories: [dictionaries] - about: - website: https://www.etymonline.com/ - wikidata_id: Q1188617 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - # - name: ebay - # engine: ebay - # shortcut: eb - # base_url: 'https://www.ebay.com' - # disabled: true - # timeout: 5 - - - name: 1x - engine: www1x - shortcut: 1x - timeout: 3.0 - disabled: true - - - name: fdroid - engine: fdroid - shortcut: fd - disabled: true - - - name: flickr - categories: images - shortcut: fl - # You can use the engine using the official stable API, but you need an API - # key, see: https://www.flickr.com/services/apps/create/ - # engine: flickr - # api_key: 'apikey' # required! - # Or you can use the html non-stable engine, activated by default - engine: flickr_noapi - - - name: free software directory - engine: mediawiki - shortcut: fsd - categories: [it, software wikis] - base_url: https://directory.fsf.org/ - search_type: title - timeout: 5.0 - disabled: true - about: - website: https://directory.fsf.org/ - wikidata_id: Q2470288 - - # - name: freesound - # engine: freesound - # shortcut: fnd - # disabled: true - # timeout: 15.0 - # API key required, see: https://freesound.org/docs/api/overview.html - # api_key: MyAPIkey - - - name: frinkiac - engine: frinkiac - shortcut: frk - disabled: true - - - name: fyyd - engine: fyyd - shortcut: fy - timeout: 8.0 - disabled: true - - - name: genius - engine: genius - shortcut: gen - - - name: gentoo - engine: gentoo - shortcut: ge - timeout: 10.0 - - - name: gitlab - engine: json_engine - paging: true - search_url: https://gitlab.com/api/v4/projects?search={query}&page={pageno} - url_query: web_url - title_query: name_with_namespace - content_query: description - page_size: 20 - categories: [it, repos] - shortcut: gl - timeout: 10.0 - disabled: true - about: - website: https://about.gitlab.com/ - wikidata_id: Q16639197 - official_api_documentation: https://docs.gitlab.com/ee/api/ - use_official_api: false - require_api_key: false - results: JSON - - - name: github - engine: github - shortcut: gh - - # This a Gitea service. If you would like to use a different instance, - # change codeberg.org to URL of the desired Gitea host. Or you can create a - # new engine by copying this and changing the name, shortcut and search_url. - - - name: codeberg - engine: json_engine - search_url: https://codeberg.org/api/v1/repos/search?q={query}&limit=10 - url_query: html_url - title_query: name - content_query: description - categories: [it, repos] - shortcut: cb - disabled: true - about: - website: https://codeberg.org/ - wikidata_id: - official_api_documentation: https://try.gitea.io/api/swagger - use_official_api: false - require_api_key: false - results: JSON - - - name: goodreads - engine: goodreads - shortcut: good - timeout: 4.0 - disabled: true - - - name: google - engine: google - shortcut: go - # additional_tests: - # android: *test_android - - - name: google images - engine: google_images - shortcut: goi - # additional_tests: - # android: *test_android - # dali: - # matrix: - # query: ['Dali Christ'] - # lang: ['en', 'de', 'fr', 'zh-CN'] - # result_container: - # - ['one_title_contains', 'Salvador'] - - - name: google news - engine: google_news - shortcut: gon - # additional_tests: - # android: *test_android - - - name: google videos - engine: google_videos - shortcut: gov - # additional_tests: - # android: *test_android - - - name: google scholar - engine: google_scholar - shortcut: gos - - - name: google play apps - engine: google_play - categories: [files, apps] - shortcut: gpa - play_categ: apps - disabled: true - - - name: google play movies - engine: google_play - categories: videos - shortcut: gpm - play_categ: movies - disabled: true - - - name: material icons - engine: material_icons - categories: images - shortcut: mi - disabled: true - - - name: gpodder - engine: json_engine - shortcut: gpod - timeout: 4.0 - paging: false - search_url: https://gpodder.net/search.json?q={query} - url_query: url - title_query: title - content_query: description - page_size: 19 - categories: music - disabled: true - about: - website: https://gpodder.net - wikidata_id: Q3093354 - official_api_documentation: https://gpoddernet.readthedocs.io/en/latest/api/ - use_official_api: false - requires_api_key: false - results: JSON - - - name: habrahabr - engine: xpath - paging: true - search_url: https://habr.com/en/search/page{pageno}/?q={query} - results_xpath: //article[contains(@class, "tm-articles-list__item")] - url_xpath: .//a[@class="tm-title__link"]/@href - title_xpath: .//a[@class="tm-title__link"] - content_xpath: .//div[contains(@class, "article-formatted-body")] - categories: it - timeout: 4.0 - disabled: true - shortcut: habr - about: - website: https://habr.com/ - wikidata_id: Q4494434 - official_api_documentation: https://habr.com/en/docs/help/api/ - use_official_api: false - require_api_key: false - results: HTML - - - name: hackernews - engine: hackernews - shortcut: hn - disabled: true - - - name: hoogle - engine: xpath - paging: true - search_url: https://hoogle.haskell.org/?hoogle={query}&start={pageno} - results_xpath: '//div[@class="result"]' - title_xpath: './/div[@class="ans"]//a' - url_xpath: './/div[@class="ans"]//a/@href' - content_xpath: './/div[@class="from"]' - page_size: 20 - categories: [it, packages] - shortcut: ho - about: - website: https://hoogle.haskell.org/ - wikidata_id: Q34010 - official_api_documentation: https://hackage.haskell.org/api - use_official_api: false - require_api_key: false - results: JSON - - - name: imdb - engine: imdb - shortcut: imdb - timeout: 6.0 - disabled: true - - - name: imgur - engine: imgur - shortcut: img - disabled: true - - - name: ina - engine: ina - shortcut: in - timeout: 6.0 - disabled: true - - - name: invidious - engine: invidious - # Instanes will be selected randomly, see https://api.invidious.io/ for - # instances that are stable (good uptime) and close to you. - base_url: - - https://invidious.io.lol - - https://invidious.fdn.fr - - https://yt.artemislena.eu - - https://invidious.tiekoetter.com - - https://invidious.flokinet.to - - https://vid.puffyan.us - - https://invidious.privacydev.net - - https://inv.tux.pizza - shortcut: iv - timeout: 3.0 - disabled: true - - - name: jisho - engine: jisho - shortcut: js - timeout: 3.0 - disabled: true - - - name: kickass - engine: kickass - base_url: - - https://kickasstorrents.to - - https://kickasstorrents.cr - - https://kickasstorrent.cr - - https://kickass.sx - - https://kat.am - shortcut: kc - timeout: 4.0 - - - name: lemmy communities - engine: lemmy - lemmy_type: Communities - shortcut: leco - - - name: lemmy users - engine: lemmy - network: lemmy communities - lemmy_type: Users - shortcut: leus - - - name: lemmy posts - engine: lemmy - network: lemmy communities - lemmy_type: Posts - shortcut: lepo - - - name: lemmy comments - engine: lemmy - network: lemmy communities - lemmy_type: Comments - shortcut: lecom - - - name: library genesis - engine: xpath - # search_url: https://libgen.is/search.php?req={query} - search_url: https://libgen.rs/search.php?req={query} - url_xpath: //a[contains(@href,"book/index.php?md5")]/@href - title_xpath: //a[contains(@href,"book/")]/text()[1] - content_xpath: //td/a[1][contains(@href,"=author")]/text() - categories: files - timeout: 7.0 - disabled: true - shortcut: lg - about: - website: https://libgen.fun/ - wikidata_id: Q22017206 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: z-library - engine: zlibrary - shortcut: zlib - categories: files - timeout: 7.0 - - - name: library of congress - engine: loc - shortcut: loc - categories: images - - - name: lingva - engine: lingva - shortcut: lv - # set lingva instance in url, by default it will use the official instance - # url: https://lingva.thedaviddelta.com - - - name: lobste.rs - engine: xpath - search_url: https://lobste.rs/search?utf8=%E2%9C%93&q={query}&what=stories&order=relevance - results_xpath: //li[contains(@class, "story")] - url_xpath: .//a[@class="u-url"]/@href - title_xpath: .//a[@class="u-url"] - content_xpath: .//a[@class="domain"] - categories: it - shortcut: lo - timeout: 5.0 - disabled: true - about: - website: https://lobste.rs/ - wikidata_id: Q60762874 - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: mastodon users - engine: mastodon - mastodon_type: accounts - base_url: https://mastodon.social - shortcut: mau - - - name: mastodon hashtags - engine: mastodon - mastodon_type: hashtags - base_url: https://mastodon.social - shortcut: mah - - # - name: matrixrooms - # engine: mrs - # # https://docs.searxng.org/dev/engines/online/mrs.html - # # base_url: https://mrs-api-host - # shortcut: mtrx - # disabled: true - - - name: mdn - shortcut: mdn - engine: json_engine - categories: [it] - paging: true - search_url: https://developer.mozilla.org/api/v1/search?q={query}&page={pageno} - results_query: documents - url_query: mdn_url - url_prefix: https://developer.mozilla.org - title_query: title - content_query: summary - about: - website: https://developer.mozilla.org - wikidata_id: Q3273508 - official_api_documentation: null - use_official_api: false - require_api_key: false - results: JSON - - - name: metacpan - engine: metacpan - shortcut: cpan - disabled: true - number_of_results: 20 - - # - name: meilisearch - # engine: meilisearch - # shortcut: mes - # enable_http: true - # base_url: http://localhost:7700 - # index: my-index - - - name: mixcloud - engine: mixcloud - shortcut: mc - - # MongoDB engine - # Required dependency: pymongo - # - name: mymongo - # engine: mongodb - # shortcut: md - # exact_match_only: false - # host: '127.0.0.1' - # port: 27017 - # enable_http: true - # results_per_page: 20 - # database: 'business' - # collection: 'reviews' # name of the db collection - # key: 'name' # key in the collection to search for - - - name: mozhi - engine: mozhi - base_url: - - https://mozhi.aryak.me - - https://translate.bus-hit.me - - https://nyc1.mz.ggtyler.dev - # mozhi_engine: google - see https://mozhi.aryak.me for supported engines - timeout: 4.0 - shortcut: mz - disabled: true - - - name: mwmbl - engine: mwmbl - # api_url: https://api.mwmbl.org - shortcut: mwm - disabled: true - - - name: npm - engine: json_engine - paging: true - first_page_num: 0 - search_url: https://api.npms.io/v2/search?q={query}&size=25&from={pageno} - results_query: results - url_query: package/links/npm - title_query: package/name - content_query: package/description - page_size: 25 - categories: [it, packages] - disabled: true - timeout: 5.0 - shortcut: npm - about: - website: https://npms.io/ - wikidata_id: Q7067518 - official_api_documentation: https://api-docs.npms.io/ - use_official_api: false - require_api_key: false - results: JSON - - - name: nyaa - engine: nyaa - shortcut: nt - disabled: true - - - name: mankier - engine: json_engine - search_url: https://www.mankier.com/api/v2/mans/?q={query} - results_query: results - url_query: url - title_query: name - content_query: description - categories: it - shortcut: man - about: - website: https://www.mankier.com/ - official_api_documentation: https://www.mankier.com/api - use_official_api: true - require_api_key: false - results: JSON - - - name: odysee - engine: odysee - shortcut: od - disabled: true - - - name: openairedatasets - engine: json_engine - paging: true - search_url: https://api.openaire.eu/search/datasets?format=json&page={pageno}&size=10&title={query} - results_query: response/results/result - url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ - title_query: metadata/oaf:entity/oaf:result/title/$ - content_query: metadata/oaf:entity/oaf:result/description/$ - content_html_to_text: true - categories: 'science' - shortcut: oad - timeout: 5.0 - about: - website: https://www.openaire.eu/ - wikidata_id: Q25106053 - official_api_documentation: https://api.openaire.eu/ - use_official_api: false - require_api_key: false - results: JSON - - - name: openairepublications - engine: json_engine - paging: true - search_url: https://api.openaire.eu/search/publications?format=json&page={pageno}&size=10&title={query} - results_query: response/results/result - url_query: metadata/oaf:entity/oaf:result/children/instance/webresource/url/$ - title_query: metadata/oaf:entity/oaf:result/title/$ - content_query: metadata/oaf:entity/oaf:result/description/$ - content_html_to_text: true - categories: science - shortcut: oap - timeout: 5.0 - about: - website: https://www.openaire.eu/ - wikidata_id: Q25106053 - official_api_documentation: https://api.openaire.eu/ - use_official_api: false - require_api_key: false - results: JSON - - # - name: opensemanticsearch - # engine: opensemantic - # shortcut: oss - # base_url: 'http://localhost:8983/solr/opensemanticsearch/' - - - name: openstreetmap - engine: openstreetmap - shortcut: osm - - - name: openrepos - engine: xpath - paging: true - search_url: https://openrepos.net/search/node/{query}?page={pageno} - url_xpath: //li[@class="search-result"]//h3[@class="title"]/a/@href - title_xpath: //li[@class="search-result"]//h3[@class="title"]/a - content_xpath: //li[@class="search-result"]//div[@class="search-snippet-info"]//p[@class="search-snippet"] - categories: files - timeout: 4.0 - disabled: true - shortcut: or - about: - website: https://openrepos.net/ - wikidata_id: - official_api_documentation: - use_official_api: false - require_api_key: false - results: HTML - - - name: packagist - engine: json_engine - paging: true - search_url: https://packagist.org/search.json?q={query}&page={pageno} - results_query: results - url_query: url - title_query: name - content_query: description - categories: [it, packages] - disabled: true - timeout: 5.0 - shortcut: pack - about: - website: https://packagist.org - wikidata_id: Q108311377 - official_api_documentation: https://packagist.org/apidoc - use_official_api: true - require_api_key: false - results: JSON - - - name: pdbe - engine: pdbe - shortcut: pdb - # Hide obsolete PDB entries. Default is not to hide obsolete structures - # hide_obsolete: false - - - name: photon - engine: photon - shortcut: ph - - - name: pinterest - engine: pinterest - shortcut: pin - - - name: piped - engine: piped - shortcut: ppd - categories: videos - piped_filter: videos - timeout: 3.0 - - # URL to use as link and for embeds - frontend_url: https://srv.piped.video - # Instance will be selected randomly, for more see https://piped-instances.kavin.rocks/ - backend_url: - - https://pipedapi.kavin.rocks - - https://pipedapi-libre.kavin.rocks - - https://pipedapi.adminforge.de - - - name: piped.music - engine: piped - network: piped - shortcut: ppdm - categories: music - piped_filter: music_songs - timeout: 3.0 - - - name: piratebay - engine: piratebay - shortcut: tpb - # You may need to change this URL to a proxy if piratebay is blocked in your - # country - url: https://thepiratebay.org/ - timeout: 3.0 - - - name: podcastindex - engine: podcastindex - shortcut: podcast - - # Required dependency: psychopg2 - # - name: postgresql - # engine: postgresql - # database: postgres - # username: postgres - # password: postgres - # limit: 10 - # query_str: 'SELECT * from my_table WHERE my_column = %(query)s' - # shortcut : psql - - - name: presearch - engine: presearch - search_type: search - categories: [general, web] - shortcut: ps - timeout: 4.0 - disabled: true - - - name: presearch images - engine: presearch - network: presearch - search_type: images - categories: [images, web] - timeout: 4.0 - shortcut: psimg - disabled: true - - - name: presearch videos - engine: presearch - network: presearch - search_type: videos - categories: [general, web] - timeout: 4.0 - shortcut: psvid - disabled: true - - - name: presearch news - engine: presearch - network: presearch - search_type: news - categories: [news, web] - timeout: 4.0 - shortcut: psnews - disabled: true - - - name: pub.dev - engine: xpath - shortcut: pd - search_url: https://pub.dev/packages?q={query}&page={pageno} - paging: true - results_xpath: //div[contains(@class,"packages-item")] - url_xpath: ./div/h3/a/@href - title_xpath: ./div/h3/a - content_xpath: ./div/div/div[contains(@class,"packages-description")]/span - categories: [packages, it] - timeout: 3.0 - disabled: true - first_page_num: 1 - about: - website: https://pub.dev/ - official_api_documentation: https://pub.dev/help/api - use_official_api: false - require_api_key: false - results: HTML - - - name: pubmed - engine: pubmed - shortcut: pub - timeout: 3.0 - - - name: pypi - shortcut: pypi - engine: xpath - paging: true - search_url: https://pypi.org/search/?q={query}&page={pageno} - results_xpath: /html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"] - url_xpath: ./@href - title_xpath: ./h3/span[@class="package-snippet__name"] - content_xpath: ./p - suggestion_xpath: /html/body/main/div/div/div/form/div/div[@class="callout-block"]/p/span/a[@class="link"] - first_page_num: 1 - categories: [it, packages] - about: - website: https://pypi.org - wikidata_id: Q2984686 - official_api_documentation: https://warehouse.readthedocs.io/api-reference/index.html - use_official_api: false - require_api_key: false - results: HTML - - - name: qwant - qwant_categ: web - engine: qwant - shortcut: qw - categories: [general, web] - additional_tests: - rosebud: *test_rosebud - - - name: qwant news - qwant_categ: news - engine: qwant - shortcut: qwn - categories: news - network: qwant - - - name: qwant images - qwant_categ: images - engine: qwant - shortcut: qwi - categories: [images, web] - network: qwant - - - name: qwant videos - qwant_categ: videos - engine: qwant - shortcut: qwv - categories: [videos, web] - network: qwant - - # - name: library - # engine: recoll - # shortcut: lib - # base_url: 'https://recoll.example.org/' - # search_dir: '' - # mount_prefix: /export - # dl_prefix: 'https://download.example.org' - # timeout: 30.0 - # categories: files - # disabled: true - - # - name: recoll library reference - # engine: recoll - # base_url: 'https://recoll.example.org/' - # search_dir: reference - # mount_prefix: /export - # dl_prefix: 'https://download.example.org' - # shortcut: libr - # timeout: 30.0 - # categories: files - # disabled: true - - - name: radio browser - engine: radio_browser - shortcut: rb - - - name: reddit - engine: reddit - shortcut: re - page_size: 25 - - - name: rottentomatoes - engine: rottentomatoes - shortcut: rt - disabled: true - - # Required dependency: redis - # - name: myredis - # shortcut : rds - # engine: redis_server - # exact_match_only: false - # host: '127.0.0.1' - # port: 6379 - # enable_http: true - # password: '' - # db: 0 - - # tmp suspended: bad certificate - # - name: scanr structures - # shortcut: scs - # engine: scanr_structures - # disabled: true - - - name: sepiasearch - engine: sepiasearch - shortcut: sep - - - name: soundcloud - engine: soundcloud - shortcut: sc - - - name: stackoverflow - engine: stackexchange - shortcut: st - api_site: 'stackoverflow' - categories: [it, q&a] - - - name: askubuntu - engine: stackexchange - shortcut: ubuntu - api_site: 'askubuntu' - categories: [it, q&a] - - - name: internetarchivescholar - engine: internet_archive_scholar - shortcut: ias - timeout: 5.0 - - - name: superuser - engine: stackexchange - shortcut: su - api_site: 'superuser' - categories: [it, q&a] - - - name: searchcode code - engine: searchcode_code - shortcut: scc - disabled: true - - # - name: searx - # engine: searx_engine - # shortcut: se - # instance_urls : - # - http://127.0.0.1:8888/ - # - ... - # disabled: true - - - name: semantic scholar - engine: semantic_scholar - disabled: true - shortcut: se - - # Spotify needs API credentials - # - name: spotify - # engine: spotify - # shortcut: stf - # api_client_id: ******* - # api_client_secret: ******* - - # - name: solr - # engine: solr - # shortcut: slr - # base_url: http://localhost:8983 - # collection: collection_name - # sort: '' # sorting: asc or desc - # field_list: '' # comma separated list of field names to display on the UI - # default_fields: '' # default field to query - # query_fields: '' # query fields - # enable_http: true - - # - name: springer nature - # engine: springer - # # get your API key from: https://dev.springernature.com/signup - # # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" - # api_key: 'unset' - # shortcut: springer - # timeout: 15.0 - - - name: startpage - engine: startpage - shortcut: sp - timeout: 6.0 - disabled: true - additional_tests: - rosebud: *test_rosebud - - - name: tokyotoshokan - engine: tokyotoshokan - shortcut: tt - timeout: 6.0 - disabled: true - - - name: solidtorrents - engine: solidtorrents - shortcut: solid - timeout: 4.0 - base_url: - - https://solidtorrents.to - - https://bitsearch.to - - # For this demo of the sqlite engine download: - # https://liste.mediathekview.de/filmliste-v2.db.bz2 - # and unpack into searx/data/filmliste-v2.db - # Query to test: "!demo concert" - # - # - name: demo - # engine: sqlite - # shortcut: demo - # categories: general - # result_template: default.html - # database: searx/data/filmliste-v2.db - # query_str: >- - # SELECT title || ' (' || time(duration, 'unixepoch') || ')' AS title, - # COALESCE( NULLIF(url_video_hd,''), NULLIF(url_video_sd,''), url_video) AS url, - # description AS content - # FROM film - # WHERE title LIKE :wildcard OR description LIKE :wildcard - # ORDER BY duration DESC - - - name: tagesschau - engine: tagesschau - # when set to false, display URLs from Tagesschau, and not the actual source - # (e.g. NDR, WDR, SWR, HR, ...) - use_source_url: true - shortcut: ts - disabled: true - - - name: tmdb - engine: xpath - paging: true - categories: movies - search_url: https://www.themoviedb.org/search?page={pageno}&query={query} - results_xpath: //div[contains(@class,"movie") or contains(@class,"tv")]//div[contains(@class,"card")] - url_xpath: .//div[contains(@class,"poster")]/a/@href - thumbnail_xpath: .//img/@src - title_xpath: .//div[contains(@class,"title")]//h2 - content_xpath: .//div[contains(@class,"overview")] - shortcut: tm - disabled: true - - # Requires Tor - - name: torch - engine: xpath - paging: true - search_url: http://xmh57jrknzkhv6y3ls3ubitzfqnkrwxhopf5aygthi7d6rplyvk3noyd.onion/cgi-bin/omega/omega?P={query}&DEFAULTOP=and - results_xpath: //table//tr - url_xpath: ./td[2]/a - title_xpath: ./td[2]/b - content_xpath: ./td[2]/small - categories: onions - enable_http: true - shortcut: tch - - # torznab engine lets you query any torznab compatible indexer. Using this - # engine in combination with Jackett opens the possibility to query a lot of - # public and private indexers directly from SearXNG. More details at: - # https://docs.searxng.org/dev/engines/online/torznab.html - # - # - name: Torznab EZTV - # engine: torznab - # shortcut: eztv - # base_url: http://localhost:9117/api/v2.0/indexers/eztv/results/torznab - # enable_http: true # if using localhost - # api_key: xxxxxxxxxxxxxxx - # show_magnet_links: true - # show_torrent_files: false - # # https://github.com/Jackett/Jackett/wiki/Jackett-Categories - # torznab_categories: # optional - # - 2000 - # - 5000 - - # tmp suspended - too slow, too many errors - # - name: urbandictionary - # engine : xpath - # search_url : https://www.urbandictionary.com/define.php?term={query} - # url_xpath : //*[@class="word"]/@href - # title_xpath : //*[@class="def-header"] - # content_xpath: //*[@class="meaning"] - # shortcut: ud - - - name: unsplash - engine: unsplash - shortcut: us - - - name: yandex music - engine: yandex_music - shortcut: ydm - disabled: true - # https://yandex.com/support/music/access.html - inactive: true - - - name: yahoo - engine: yahoo - shortcut: yh - disabled: true - - - name: yahoo news - engine: yahoo_news - shortcut: yhn - - - name: youtube - shortcut: yt - # You can use the engine using the official stable API, but you need an API - # key See: https://console.developers.google.com/project - # - # engine: youtube_api - # api_key: 'apikey' # required! - # - # Or you can use the html non-stable engine, activated by default - engine: youtube_noapi - - - name: dailymotion - engine: dailymotion - shortcut: dm - - - name: vimeo - engine: vimeo - shortcut: vm - - - name: wiby - engine: json_engine - paging: true - search_url: https://wiby.me/json/?q={query}&p={pageno} - url_query: URL - title_query: Title - content_query: Snippet - categories: [general, web] - shortcut: wib - disabled: true - about: - website: https://wiby.me/ - - - name: alexandria - engine: json_engine - shortcut: alx - categories: general - paging: true - search_url: https://api.alexandria.org/?a=1&q={query}&p={pageno} - results_query: results - title_query: title - url_query: url - content_query: snippet - timeout: 1.5 - disabled: true - about: - website: https://alexandria.org/ - official_api_documentation: https://github.com/alexandria-org/alexandria-api/raw/master/README.md - use_official_api: true - require_api_key: false - results: JSON - - - name: wikibooks - engine: mediawiki - weight: 0.5 - shortcut: wb - categories: [general, wikimedia] - base_url: 'https://{language}.wikibooks.org/' - search_type: text - disabled: true - about: - website: https://www.wikibooks.org/ - wikidata_id: Q367 - - - name: wikinews - engine: mediawiki - shortcut: wn - categories: [news, wikimedia] - base_url: 'https://{language}.wikinews.org/' - search_type: text - srsort: create_timestamp_desc - about: - website: https://www.wikinews.org/ - wikidata_id: Q964 - - - name: wikiquote - engine: mediawiki - weight: 0.5 - shortcut: wq - categories: [general, wikimedia] - base_url: 'https://{language}.wikiquote.org/' - search_type: text - disabled: true - additional_tests: - rosebud: *test_rosebud - about: - website: https://www.wikiquote.org/ - wikidata_id: Q369 - - - name: wikisource - engine: mediawiki - weight: 0.5 - shortcut: ws - categories: [general, wikimedia] - base_url: 'https://{language}.wikisource.org/' - search_type: text - disabled: true - about: - website: https://www.wikisource.org/ - wikidata_id: Q263 - - - name: wikispecies - engine: mediawiki - shortcut: wsp - categories: [general, science, wikimedia] - base_url: 'https://species.wikimedia.org/' - search_type: text - disabled: true - about: - website: https://species.wikimedia.org/ - wikidata_id: Q13679 - - - name: wiktionary - engine: mediawiki - shortcut: wt - categories: [dictionaries, wikimedia] - base_url: 'https://{language}.wiktionary.org/' - search_type: text - about: - website: https://www.wiktionary.org/ - wikidata_id: Q151 - - - name: wikiversity - engine: mediawiki - weight: 0.5 - shortcut: wv - categories: [general, wikimedia] - base_url: 'https://{language}.wikiversity.org/' - search_type: text - disabled: true - about: - website: https://www.wikiversity.org/ - wikidata_id: Q370 - - - name: wikivoyage - engine: mediawiki - weight: 0.5 - shortcut: wy - categories: [general, wikimedia] - base_url: 'https://{language}.wikivoyage.org/' - search_type: text - disabled: true - about: - website: https://www.wikivoyage.org/ - wikidata_id: Q373 - - - name: wikicommons.images - engine: wikicommons - shortcut: wc - categories: images - number_of_results: 10 - - name: wolframalpha - shortcut: wa - # You can use the engine using the official stable API, but you need an API - # key. See: https://products.wolframalpha.com/api/ - # - # engine: wolframalpha_api - # api_key: '' - # - # Or you can use the html non-stable engine, activated by default - engine: wolframalpha_noapi - timeout: 6.0 - categories: general disabled: false - - - name: dictzone - engine: dictzone - shortcut: dc - - - name: mymemory translated - engine: translated - shortcut: tl - timeout: 5.0 - # You can use without an API key, but you are limited to 1000 words/day - # See: https://mymemory.translated.net/doc/usagelimits.php - # api_key: '' - - # Required dependency: mysql-connector-python - # - name: mysql - # engine: mysql_server - # database: mydatabase - # username: user - # password: pass - # limit: 10 - # query_str: 'SELECT * from mytable WHERE fieldname=%(query)s' - # shortcut: mysql - - - name: 1337x - engine: 1337x - shortcut: 1337x - disabled: true - - - name: duden - engine: duden - shortcut: du - disabled: true - - - name: seznam - shortcut: szn - engine: seznam - disabled: true - - # - name: deepl - # engine: deepl - # shortcut: dpl - # # You can use the engine using the official stable API, but you need an API key - # # See: https://www.deepl.com/pro-api?cta=header-pro-api - # api_key: '' # required! - # timeout: 5.0 - # disabled: true - - - name: mojeek - shortcut: mjk - engine: xpath - paging: true - categories: [general, web] - search_url: https://www.mojeek.com/search?q={query}&s={pageno}&lang={lang}&lb={lang} - results_xpath: //ul[@class="results-standard"]/li/a[@class="ob"] - url_xpath: ./@href - title_xpath: ../h2/a - content_xpath: ..//p[@class="s"] - suggestion_xpath: //div[@class="top-info"]/p[@class="top-info spell"]/em/a - first_page_num: 0 - page_size: 10 - max_page: 100 - disabled: true - about: - website: https://www.mojeek.com/ - wikidata_id: Q60747299 - official_api_documentation: https://www.mojeek.com/services/api.html/ - use_official_api: false - require_api_key: false - results: HTML - - - name: moviepilot - engine: moviepilot - shortcut: mp - disabled: true - - - name: naver - shortcut: nvr - categories: [general, web] - engine: xpath - paging: true - search_url: https://search.naver.com/search.naver?where=webkr&sm=osp_hty&ie=UTF-8&query={query}&start={pageno} - url_xpath: //a[@class="link_tit"]/@href - title_xpath: //a[@class="link_tit"] - content_xpath: //a[@class="total_dsc"]/div - first_page_num: 1 - page_size: 10 - disabled: true - about: - website: https://www.naver.com/ - wikidata_id: Q485639 - official_api_documentation: https://developers.naver.com/docs/nmt/examples/ - use_official_api: false - require_api_key: false - results: HTML - language: ko - - - name: rubygems - shortcut: rbg - engine: xpath - paging: true - search_url: https://rubygems.org/search?page={pageno}&query={query} - results_xpath: /html/body/main/div/a[@class="gems__gem"] - url_xpath: ./@href - title_xpath: ./span/h2 - content_xpath: ./span/p - suggestion_xpath: /html/body/main/div/div[@class="search__suggestions"]/p/a - first_page_num: 1 - categories: [it, packages] - disabled: true - about: - website: https://rubygems.org/ - wikidata_id: Q1853420 - official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ - use_official_api: false - require_api_key: false - results: HTML - - - name: peertube - engine: peertube - shortcut: ptb - paging: true - # alternatives see: https://instances.joinpeertube.org/instances - # base_url: https://tube.4aem.com - categories: videos - disabled: true - timeout: 6.0 - - - name: mediathekviewweb - engine: mediathekviewweb - shortcut: mvw - disabled: true - - - name: yacy - engine: yacy - categories: general - search_type: text - base_url: https://yacy.searchlab.eu - shortcut: ya - disabled: true - # required if you aren't using HTTPS for your local yacy instance - # https://docs.searxng.org/dev/engines/online/yacy.html - # enable_http: true - # timeout: 3.0 - # search_mode: 'global' - - - name: yacy images - engine: yacy - categories: images - search_type: image - base_url: https://yacy.searchlab.eu - shortcut: yai - disabled: true - - - name: rumble - engine: rumble - shortcut: ru - base_url: https://rumble.com/ - paging: true - categories: videos - disabled: true - - - name: livespace - engine: livespace - shortcut: ls - categories: videos - disabled: true - timeout: 5.0 - - - name: wordnik - engine: wordnik - shortcut: def - base_url: https://www.wordnik.com/ - categories: [dictionaries] - timeout: 5.0 - - - name: woxikon.de synonyme - engine: xpath - shortcut: woxi - categories: [dictionaries] - timeout: 5.0 - disabled: true - search_url: https://synonyme.woxikon.de/synonyme/{query}.php - url_xpath: //div[@class="upper-synonyms"]/a/@href - content_xpath: //div[@class="synonyms-list-group"] - title_xpath: //div[@class="upper-synonyms"]/a - no_result_for_http_status: [404] - about: - website: https://www.woxikon.de/ - wikidata_id: # No Wikidata ID - use_official_api: false - require_api_key: false - results: HTML - language: de - - - name: seekr news - engine: seekr - shortcut: senews - categories: news - seekr_category: news - disabled: true - - - name: seekr images - engine: seekr - network: seekr news - shortcut: seimg - categories: images - seekr_category: images - disabled: true - - - name: seekr videos - engine: seekr - network: seekr news - shortcut: sevid - categories: videos - seekr_category: videos - disabled: true - - - name: sjp.pwn - engine: sjp - shortcut: sjp - base_url: https://sjp.pwn.pl/ - timeout: 5.0 - disabled: true - - - name: stract - engine: stract - shortcut: str - disabled: true - - - name: svgrepo - engine: svgrepo - shortcut: svg - timeout: 10.0 - disabled: true - - - name: tootfinder - engine: tootfinder - shortcut: toot - - - name: wallhaven - engine: wallhaven - # api_key: abcdefghijklmnopqrstuvwxyz - shortcut: wh - - # wikimini: online encyclopedia for children - # The fulltext and title parameter is necessary for Wikimini because - # sometimes it will not show the results and redirect instead - - name: wikimini - engine: xpath - shortcut: wkmn - search_url: https://fr.wikimini.org/w/index.php?search={query}&title=Sp%C3%A9cial%3ASearch&fulltext=Search - url_xpath: //li/div[@class="mw-search-result-heading"]/a/@href - title_xpath: //li//div[@class="mw-search-result-heading"]/a - content_xpath: //li/div[@class="searchresult"] - categories: general - disabled: true - about: - website: https://wikimini.org/ - wikidata_id: Q3568032 - use_official_api: false - require_api_key: false - results: HTML - language: fr - - - name: wttr.in - engine: wttr - shortcut: wttr - timeout: 9.0 - - - name: yummly - engine: yummly - shortcut: yum - disabled: true - - - name: brave - engine: brave - shortcut: br - time_range_support: true - paging: true - categories: [general, web] - brave_category: search - # brave_spellcheck: true - - - name: brave.images - engine: brave - network: brave - shortcut: brimg - categories: [images, web] - brave_category: images - - - name: brave.videos - engine: brave - network: brave - shortcut: brvid - categories: [videos, web] - brave_category: videos - - - name: brave.news - engine: brave - network: brave - shortcut: brnews - categories: news - brave_category: news - - # - name: brave.goggles - # engine: brave - # network: brave - # shortcut: brgog - # time_range_support: true - # paging: true - # categories: [general, web] - # brave_category: goggles - # Goggles: # required! This should be a URL ending in .goggle - - - name: lib.rs - shortcut: lrs - engine: xpath - search_url: https://lib.rs/search?q={query} - results_xpath: /html/body/main/div/ol/li/a - url_xpath: ./@href - title_xpath: ./div[@class="h"]/h4 - content_xpath: ./div[@class="h"]/p - categories: [it, packages] - disabled: true - about: - website: https://lib.rs - wikidata_id: Q113486010 - use_official_api: false - require_api_key: false - results: HTML - - - name: sourcehut - shortcut: srht - engine: xpath - paging: true - search_url: https://sr.ht/projects?page={pageno}&search={query} - results_xpath: (//div[@class="event-list"])[1]/div[@class="event"] - url_xpath: ./h4/a[2]/@href - title_xpath: ./h4/a[2] - content_xpath: ./p - first_page_num: 1 - categories: [it, repos] - disabled: true - about: - website: https://sr.ht - wikidata_id: Q78514485 - official_api_documentation: https://man.sr.ht/ - use_official_api: false - require_api_key: false - results: HTML - - - name: goo - shortcut: goo - engine: xpath - paging: true - search_url: https://search.goo.ne.jp/web.jsp?MT={query}&FR={pageno}0 - url_xpath: //div[@class="result"]/p[@class='title fsL1']/a/@href - title_xpath: //div[@class="result"]/p[@class='title fsL1']/a - content_xpath: //p[contains(@class,'url fsM')]/following-sibling::p - first_page_num: 0 - categories: [general, web] - disabled: true - timeout: 4.0 - about: - website: https://search.goo.ne.jp - wikidata_id: Q249044 - use_official_api: false - require_api_key: false - results: HTML - language: ja - - - name: bt4g - engine: bt4g - shortcut: bt4g - - - name: pkg.go.dev - engine: xpath - shortcut: pgo - search_url: https://pkg.go.dev/search?limit=100&m=package&q={query} - results_xpath: /html/body/main/div[contains(@class,"SearchResults")]/div[not(@class)]/div[@class="SearchSnippet"] - url_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a/@href - title_xpath: ./div[@class="SearchSnippet-headerContainer"]/h2/a - content_xpath: ./p[@class="SearchSnippet-synopsis"] - categories: [packages, it] - timeout: 3.0 - disabled: true - about: - website: https://pkg.go.dev/ - use_official_api: false - require_api_key: false - results: HTML - -# Doku engine lets you access to any Doku wiki instance: -# A public one or a privete/corporate one. -# - name: ubuntuwiki -# engine: doku -# shortcut: uw -# base_url: 'https://doc.ubuntu-fr.org' - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: git grep -# engine: command -# command: ['git', 'grep', '{{QUERY}}'] -# shortcut: gg -# tokens: [] -# disabled: true -# delimiter: -# chars: ':' -# keys: ['filepath', 'code'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: locate -# engine: command -# command: ['locate', '{{QUERY}}'] -# shortcut: loc -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: find -# engine: command -# command: ['find', '.', '-name', '{{QUERY}}'] -# query_type: path -# shortcut: fnd -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: pattern search in files -# engine: command -# command: ['fgrep', '{{QUERY}}'] -# shortcut: fgr -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -# Be careful when enabling this engine if you are -# running a public instance. Do not expose any sensitive -# information. You can restrict access by configuring a list -# of access tokens under tokens. -# - name: regex search in files -# engine: command -# command: ['grep', '{{QUERY}}'] -# shortcut: gr -# tokens: [] -# disabled: true -# delimiter: -# chars: ' ' -# keys: ['line'] - -doi_resolvers: - oadoi.org: 'https://oadoi.org/' - doi.org: 'https://doi.org/' - doai.io: 'https://dissem.in/' - sci-hub.se: 'https://sci-hub.se/' - sci-hub.st: 'https://sci-hub.st/' - sci-hub.ru: 'https://sci-hub.ru/' - -default_doi_resolver: 'oadoi.org' diff --git a/src/agents/academicSearchAgent.ts b/src/agents/academicSearchAgent.ts index d797119..a72e3a2 100644 --- a/src/agents/academicSearchAgent.ts +++ b/src/agents/academicSearchAgent.ts @@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import computeSimilarity from '../utils/computeSimilarity'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicAcademicSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. @@ -66,7 +67,7 @@ const basicAcademicSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/redditSearchAgent.ts b/src/agents/redditSearchAgent.ts index 578e2bb..9c2c443 100644 --- a/src/agents/redditSearchAgent.ts +++ b/src/agents/redditSearchAgent.ts @@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import computeSimilarity from '../utils/computeSimilarity'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicRedditSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. @@ -66,7 +67,7 @@ const basicRedditSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/webSearchAgent.ts b/src/agents/webSearchAgent.ts index 9818d01..77ec181 100644 --- a/src/agents/webSearchAgent.ts +++ b/src/agents/webSearchAgent.ts @@ -19,34 +19,83 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import computeSimilarity from '../utils/computeSimilarity'; import logger from '../utils/logger'; +import LineListOutputParser from '../lib/outputParsers/listLineOutputParser'; +import { getDocumentsFromLinks } from '../lib/linkDocument'; +import LineOutputParser from '../lib/outputParsers/lineOutputParser'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; +import { ChatOpenAI } from '@langchain/openai'; const basicSearchRetrieverPrompt = ` -You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. -If it is a writing task or a simple hi, hello rather than a question, you need to return \`not_needed\` as the response. +You are an AI question rephraser. You will be given a conversation and a follow-up question, you will have to rephrase the follow up question so it is a standalone question and can be used by another LLM to search the web for information to answer it. +If it is a smple writing task or a greeting (unless the greeting contains a question after it) like Hi, Hello, How are you, etc. than a question then you need to return \`not_needed\` as the response (This is because the LLM won't need to search the web for finding information on this topic). +If the user asks some question from some URL or wants you to summarize a PDF or a webpage (via URL) you need to return the links inside the \`links\` XML block and the question inside the \`question\` XML block. If the user wants to you to summarize the webpage or the PDF you need to return \`summarize\` inside the \`question\` XML block in place of a question and the link to summarize in the \`links\` XML block. +You must always return the rephrased question inside the \`question\` XML block, if there are no links in the follow-up question then don't insert a \`links\` XML block in your response. -Example: -1. Follow up question: What is the capital of France? -Rephrased: Capital of france +There are several examples attached for your reference inside the below \`examples\` XML block -2. Follow up question: What is the population of New York City? -Rephrased: Population of New York City + +1. Follow up question: What is the capital of France +Rephrased question:\` + +Capital of france + +\` + +2. Hi, how are you? +Rephrased question\` + +not_needed + +\` 3. Follow up question: What is Docker? -Rephrased: What is Docker +Rephrased question: \` + +What is Docker + +\` -Conversation: +4. Follow up question: Can you tell me what is X from https://example.com +Rephrased question: \` + +Can you tell me what is X? + + + +https://example.com + +\` + +5. Follow up question: Summarize the content from https://example.com +Rephrased question: \` + +summarize + + + +https://example.com + +\` + + +Anything below is the part of the actual conversation and you need to use conversation and the follow-up question to rephrase the follow-up question as a standalone question based on the guidelines shared above. + + {chat_history} + Follow up question: {query} Rephrased question: `; const basicWebSearchResponsePrompt = ` - You are Perplexica, an AI model who is expert at searching the web and answering user's queries. + You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them. Generate a response that is informative and relevant to the user's query based on provided context (the context consits of search results containing a brief description of the content of that page). You must use this context to answer the user's query in the best way possible. Use an unbaised and journalistic tone in your response. Do not repeat the text. You must not tell the user to open any link or visit any website to get the answer. You must provide the answer in the response itself. If the user asks for links you can provide them. + If the query contains some links and the user asks to answer from those links you will be provided the entire content of the page inside the \`context\` XML block. You can then use this content to answer the user's query. + If the user asks to summarize content from some links, you will be provided the entire content of the page inside the \`context\` XML block. You can then use this content to summarize the text. The content provided inside the \`context\` block will be already summarized by another model so you just need to use that content to answer the user's query. Your responses should be medium to long in length be informative and relevant to the user's query. You can use markdowns to format your response. You should use bullet points to list the information. Make sure the answer is not short and is informative. You have to cite the answer using [number] notation. You must cite the sentences with their relevent context number. You must cite each and every part of the answer so the user can know where the information is coming from. Place these citations at the end of that particular sentence. You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2]. @@ -59,14 +108,14 @@ const basicWebSearchResponsePrompt = ` {context} - If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. + If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'. You do not need to do this for summarization tasks. Anything between the \`context\` is retrieved from a search engine and is not a part of the conversation with the user. Today's date is ${new Date().toISOString()} `; const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { @@ -103,32 +152,120 @@ type BasicChainInput = { }; const createBasicWebSearchRetrieverChain = (llm: BaseChatModel) => { + (llm as unknown as ChatOpenAI).temperature = 0; + return RunnableSequence.from([ PromptTemplate.fromTemplate(basicSearchRetrieverPrompt), llm, strParser, RunnableLambda.from(async (input: string) => { - if (input === 'not_needed') { + const linksOutputParser = new LineListOutputParser({ + key: 'links', + }); + + const questionOutputParser = new LineOutputParser({ + key: 'question', + }); + + const links = await linksOutputParser.parse(input); + let question = await questionOutputParser.parse(input); + + if (question === 'not_needed') { return { query: '', docs: [] }; } - const res = await searchSearxng(input, { - language: 'en', - }); + if (links.length > 0) { + if (question.length === 0) { + question = 'summarize'; + } - const documents = res.results.map( - (result) => - new Document({ - pageContent: result.content, - metadata: { - title: result.title, - url: result.url, - ...(result.img_src && { img_src: result.img_src }), - }, + let docs = []; + + const linkDocs = await getDocumentsFromLinks({ links }); + + const docGroups: Document[] = []; + + linkDocs.map((doc) => { + const URLDocExists = docGroups.find( + (d) => + d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10, + ); + + if (!URLDocExists) { + docGroups.push({ + ...doc, + metadata: { + ...doc.metadata, + totalDocs: 1, + }, + }); + } + + const docIndex = docGroups.findIndex( + (d) => + d.metadata.url === doc.metadata.url && d.metadata.totalDocs < 10, + ); + + if (docIndex !== -1) { + docGroups[docIndex].pageContent = + docGroups[docIndex].pageContent + `\n\n` + doc.pageContent; + docGroups[docIndex].metadata.totalDocs += 1; + } + }); + + await Promise.all( + docGroups.map(async (doc) => { + const res = await llm.invoke(` + You are a text summarizer. You need to summarize the text provided inside the \`text\` XML block. + You need to summarize the text into 1 or 2 sentences capturing the main idea of the text. + You need to make sure that you don't miss any point while summarizing the text. + You will also be given a \`query\` XML block which will contain the query of the user. Try to answer the query in the summary from the text provided. + If the query says Summarize then you just need to summarize the text without answering the query. + Only return the summarized text without any other messages, text or XML block. + + + ${question} + + + + ${doc.pageContent} + + + Make sure to answer the query in the summary. + `); + + const document = new Document({ + pageContent: res.content as string, + metadata: { + title: doc.metadata.title, + url: doc.metadata.url, + }, + }); + + docs.push(document); }), - ); + ); - return { query: input, docs: documents }; + return { query: question, docs: docs }; + } else { + const res = await searchSearxng(question, { + language: 'en', + }); + + const documents = res.results.map( + (result) => + new Document({ + pageContent: result.content, + metadata: { + title: result.title, + url: result.url, + ...(result.img_src && { img_src: result.img_src }), + }, + }), + ); + + return { query: question, docs: documents }; + } }), ]); }; @@ -156,6 +293,10 @@ const createBasicWebSearchAnsweringChain = ( return docs; } + if (query.toLocaleLowerCase() === 'summarize') { + return docs; + } + const docsWithContent = docs.filter( (doc) => doc.pageContent && doc.pageContent.length > 0, ); @@ -175,7 +316,7 @@ const createBasicWebSearchAnsweringChain = ( }); const sortedDocs = similarity - .filter((sim) => sim.similarity > 0.5) + .filter((sim) => sim.similarity > 0.3) .sort((a, b) => b.similarity - a.similarity) .slice(0, 15) .map((sim) => docsWithContent[sim.index]); diff --git a/src/agents/wolframAlphaSearchAgent.ts b/src/agents/wolframAlphaSearchAgent.ts index b80fcf3..1de8d0f 100644 --- a/src/agents/wolframAlphaSearchAgent.ts +++ b/src/agents/wolframAlphaSearchAgent.ts @@ -18,6 +18,7 @@ import type { Embeddings } from '@langchain/core/embeddings'; import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicWolframAlphaSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. @@ -65,7 +66,7 @@ const basicWolframAlphaSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/writingAssistant.ts b/src/agents/writingAssistant.ts index 7c2cb49..7e275df 100644 --- a/src/agents/writingAssistant.ts +++ b/src/agents/writingAssistant.ts @@ -10,6 +10,7 @@ import eventEmitter from 'events'; import type { BaseChatModel } from '@langchain/core/language_models/chat_models'; import type { Embeddings } from '@langchain/core/embeddings'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const writingAssistantPrompt = ` You are Perplexica, an AI model who is expert at searching the web and answering user's queries. You are currently set on focus mode 'Writing Assistant', this means you will be helping the user write a response to a given query. @@ -19,7 +20,7 @@ Since you are a writing assistant, you would not perform web searches. If you th const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/agents/youtubeSearchAgent.ts b/src/agents/youtubeSearchAgent.ts index 4133157..e9b6553 100644 --- a/src/agents/youtubeSearchAgent.ts +++ b/src/agents/youtubeSearchAgent.ts @@ -19,6 +19,7 @@ import formatChatHistoryAsString from '../utils/formatHistory'; import eventEmitter from 'events'; import computeSimilarity from '../utils/computeSimilarity'; import logger from '../utils/logger'; +import { IterableReadableStream } from '@langchain/core/utils/stream'; const basicYoutubeSearchRetrieverPrompt = ` You will be given a conversation below and a follow up question. You need to rephrase the follow-up question if needed so it is a standalone question that can be used by the LLM to search the web for information. @@ -66,7 +67,7 @@ const basicYoutubeSearchResponsePrompt = ` const strParser = new StringOutputParser(); const handleStream = async ( - stream: AsyncGenerator, + stream: IterableReadableStream, emitter: eventEmitter, ) => { for await (const event of stream) { diff --git a/src/app.ts b/src/app.ts index b8c2371..96b3a0c 100644 --- a/src/app.ts +++ b/src/app.ts @@ -28,3 +28,11 @@ server.listen(port, () => { }); startWebSocketServer(server); + +process.on('uncaughtException', (err, origin) => { + logger.error(`Uncaught Exception at ${origin}: ${err}`); +}); + +process.on('unhandledRejection', (reason, promise) => { + logger.error(`Unhandled Rejection at: ${promise}, reason: ${reason}`); +}); diff --git a/src/config.ts b/src/config.ts index 72763ee..9a537d9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -41,10 +41,14 @@ export const getGroqApiKey = () => loadConfig().API_KEYS.GROQ; export const getAnthropicApiKey = () => loadConfig().API_KEYS.ANTHROPIC; + export const getGeminiApiKey = () => loadConfig().API_KEYS.Gemini; export const getSearxngApiEndpoint = () => loadConfig().API_ENDPOINTS.SEARXNG; +export const getSearxngApiEndpoint = () => + process.env.SEARXNG_API_URL || loadConfig().API_ENDPOINTS.SEARXNG; + export const getOllamaApiEndpoint = () => loadConfig().API_ENDPOINTS.OLLAMA; export const updateConfig = (config: RecursivePartial) => { diff --git a/src/lib/linkDocument.ts b/src/lib/linkDocument.ts new file mode 100644 index 0000000..5e90571 --- /dev/null +++ b/src/lib/linkDocument.ts @@ -0,0 +1,99 @@ +import axios from 'axios'; +import { htmlToText } from 'html-to-text'; +import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; +import { Document } from '@langchain/core/documents'; +import pdfParse from 'pdf-parse'; +import logger from '../utils/logger'; + +export const getDocumentsFromLinks = async ({ links }: { links: string[] }) => { + const splitter = new RecursiveCharacterTextSplitter(); + + let docs: Document[] = []; + + await Promise.all( + links.map(async (link) => { + link = + link.startsWith('http://') || link.startsWith('https://') + ? link + : `https://${link}`; + + try { + const res = await axios.get(link, { + responseType: 'arraybuffer', + }); + + const isPdf = res.headers['content-type'] === 'application/pdf'; + + if (isPdf) { + const pdfText = await pdfParse(res.data); + const parsedText = pdfText.text + .replace(/(\r\n|\n|\r)/gm, ' ') + .replace(/\s+/g, ' ') + .trim(); + + const splittedText = await splitter.splitText(parsedText); + const title = 'PDF Document'; + + const linkDocs = splittedText.map((text) => { + return new Document({ + pageContent: text, + metadata: { + title: title, + url: link, + }, + }); + }); + + docs.push(...linkDocs); + return; + } + + const parsedText = htmlToText(res.data.toString('utf8'), { + selectors: [ + { + selector: 'a', + options: { + ignoreHref: true, + }, + }, + ], + }) + .replace(/(\r\n|\n|\r)/gm, ' ') + .replace(/\s+/g, ' ') + .trim(); + + const splittedText = await splitter.splitText(parsedText); + const title = res.data + .toString('utf8') + .match(/(.*?)<\/title>/)?.[1]; + + const linkDocs = splittedText.map((text) => { + return new Document({ + pageContent: text, + metadata: { + title: title || link, + url: link, + }, + }); + }); + + docs.push(...linkDocs); + } catch (err) { + logger.error( + `Error at generating documents from links: ${err.message}`, + ); + docs.push( + new Document({ + pageContent: `Failed to retrieve content from the link: ${err.message}`, + metadata: { + title: 'Failed to retrieve content', + url: link, + }, + }), + ); + } + }), + ); + + return docs; +}; diff --git a/src/lib/outputParsers/lineOutputParser.ts b/src/lib/outputParsers/lineOutputParser.ts new file mode 100644 index 0000000..b50a20e --- /dev/null +++ b/src/lib/outputParsers/lineOutputParser.ts @@ -0,0 +1,46 @@ +import { BaseOutputParser } from '@langchain/core/output_parsers'; + +interface LineOutputParserArgs { + key?: string; +} + +class LineOutputParser extends BaseOutputParser<string> { + private key = 'questions'; + + constructor(args?: LineOutputParserArgs) { + super(); + this.key = args.key ?? this.key; + } + + static lc_name() { + return 'LineOutputParser'; + } + + lc_namespace = ['langchain', 'output_parsers', 'line_output_parser']; + + async parse(text: string): Promise<string> { + const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/; + const startKeyIndex = text.indexOf(`<${this.key}>`); + const endKeyIndex = text.indexOf(`</${this.key}>`); + + if (startKeyIndex === -1 || endKeyIndex === -1) { + return ''; + } + + const questionsStartIndex = + startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length; + const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex; + const line = text + .slice(questionsStartIndex, questionsEndIndex) + .trim() + .replace(regex, ''); + + return line; + } + + getFormatInstructions(): string { + throw new Error('Not implemented.'); + } +} + +export default LineOutputParser; diff --git a/src/lib/outputParsers/listLineOutputParser.ts b/src/lib/outputParsers/listLineOutputParser.ts index 57a9bbc..1e2f0f5 100644 --- a/src/lib/outputParsers/listLineOutputParser.ts +++ b/src/lib/outputParsers/listLineOutputParser.ts @@ -22,6 +22,11 @@ class LineListOutputParser extends BaseOutputParser<string[]> { const regex = /^(\s*(-|\*|\d+\.\s|\d+\)\s|\u2022)\s*)+/; const startKeyIndex = text.indexOf(`<${this.key}>`); const endKeyIndex = text.indexOf(`</${this.key}>`); + + if (startKeyIndex === -1 || endKeyIndex === -1) { + return []; + } + const questionsStartIndex = startKeyIndex === -1 ? 0 : startKeyIndex + `<${this.key}>`.length; const questionsEndIndex = endKeyIndex === -1 ? text.length : endKeyIndex; diff --git a/ui/app/layout.tsx b/ui/app/layout.tsx index 2edbf94..684a99c 100644 --- a/ui/app/layout.tsx +++ b/ui/app/layout.tsx @@ -34,7 +34,7 @@ export default function RootLayout({ unstyled: true, classNames: { toast: - 'bg-light-primary dark:bg-dark-primary text-white rounded-lg p-4 flex flex-row items-center space-x-2', + 'bg-light-primary dark:bg-dark-secondary dark:text-white/70 text-black-70 rounded-lg p-4 flex flex-row items-center space-x-2', }, }} /> diff --git a/ui/components/ChatWindow.tsx b/ui/components/ChatWindow.tsx index f2c89a3..b3d0089 100644 --- a/ui/components/ChatWindow.tsx +++ b/ui/components/ChatWindow.tsx @@ -38,43 +38,56 @@ const useSocket = ( 'embeddingModelProvider', ); + const providers = await fetch( + `${process.env.NEXT_PUBLIC_API_URL}/models`, + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ).then(async (res) => await res.json()); + if ( !chatModel || !chatModelProvider || !embeddingModel || !embeddingModelProvider ) { - const providers = await fetch( - `${process.env.NEXT_PUBLIC_API_URL}/models`, - { - headers: { - 'Content-Type': 'application/json', - }, - }, - ).then(async (res) => await res.json()); + if (!chatModel || !chatModelProvider) { + const chatModelProviders = providers.chatModelProviders; - const chatModelProviders = providers.chatModelProviders; - const embeddingModelProviders = providers.embeddingModelProviders; + chatModelProvider = Object.keys(chatModelProviders)[0]; - if ( - !chatModelProviders || - Object.keys(chatModelProviders).length === 0 - ) - return toast.error('No chat models available'); + if (chatModelProvider === 'custom_openai') { + toast.error( + 'Seems like you are using the custom OpenAI provider, please open the settings and configure the API key and base URL', + ); + setError(true); + return; + } else { + chatModel = Object.keys(chatModelProviders[chatModelProvider])[0]; + if ( + !chatModelProviders || + Object.keys(chatModelProviders).length === 0 + ) + return toast.error('No chat models available'); + } + } - if ( - !embeddingModelProviders || - Object.keys(embeddingModelProviders).length === 0 - ) - return toast.error('No embedding models available'); + if (!embeddingModel || !embeddingModelProvider) { + const embeddingModelProviders = providers.embeddingModelProviders; - chatModelProvider = Object.keys(chatModelProviders)[0]; - chatModel = Object.keys(chatModelProviders[chatModelProvider])[0]; + if ( + !embeddingModelProviders || + Object.keys(embeddingModelProviders).length === 0 + ) + return toast.error('No embedding models available'); - embeddingModelProvider = Object.keys(embeddingModelProviders)[0]; - embeddingModel = Object.keys( - embeddingModelProviders[embeddingModelProvider], - )[0]; + embeddingModelProvider = Object.keys(embeddingModelProviders)[0]; + embeddingModel = Object.keys( + embeddingModelProviders[embeddingModelProvider], + )[0]; + } localStorage.setItem('chatModel', chatModel!); localStorage.setItem('chatModelProvider', chatModelProvider); @@ -84,15 +97,6 @@ const useSocket = ( embeddingModelProvider, ); } else { - const providers = await fetch( - `${process.env.NEXT_PUBLIC_API_URL}/models`, - { - headers: { - 'Content-Type': 'application/json', - }, - }, - ).then(async (res) => await res.json()); - const chatModelProviders = providers.chatModelProviders; const embeddingModelProviders = providers.embeddingModelProviders; @@ -106,6 +110,7 @@ const useSocket = ( if ( chatModelProvider && + chatModelProvider != 'custom_openai' && !chatModelProviders[chatModelProvider][chatModel] ) { chatModel = Object.keys(chatModelProviders[chatModelProvider])[0]; @@ -160,8 +165,6 @@ const useSocket = ( const timeoutId = setTimeout(() => { if (ws.readyState !== 1) { - ws.close(); - setError(true); toast.error( 'Failed to connect to the server. Please try again later.', ); @@ -171,7 +174,6 @@ const useSocket = ( ws.onopen = () => { console.log('[DEBUG] open'); clearTimeout(timeoutId); - setError(false); setIsWSReady(true); }; @@ -187,18 +189,18 @@ const useSocket = ( console.log('[DEBUG] closed'); }; + ws.addEventListener('message', (e) => { + const data = JSON.parse(e.data); + if (data.type === 'error') { + toast.error(data.data); + } + }); + setWs(ws); }; connectWs(); } - - return () => { - if (ws?.readyState === 1) { - ws?.close(); - console.log('[DEBUG] closed'); - } - }; }, [ws, url, setIsWSReady, setError]); return ws; @@ -304,6 +306,15 @@ const ChatWindow = ({ id }: { id?: string }) => { // eslint-disable-next-line react-hooks/exhaustive-deps }, []); + useEffect(() => { + return () => { + if (ws?.readyState === 1) { + ws.close(); + console.log('[DEBUG] closed'); + } + }; + }, []); + const messagesRef = useRef<Message[]>([]); useEffect(() => { diff --git a/ui/components/EmptyChatMessageInput.tsx b/ui/components/EmptyChatMessageInput.tsx index 0ff9b2e..39d3f16 100644 --- a/ui/components/EmptyChatMessageInput.tsx +++ b/ui/components/EmptyChatMessageInput.tsx @@ -18,14 +18,21 @@ const EmptyChatMessageInput = ({ const inputRef = useRef<HTMLTextAreaElement | null>(null); - const handleKeyDown = (e: KeyboardEvent) => { - if (e.key === '/') { - e.preventDefault(); - inputRef.current?.focus(); - } - }; - useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + const activeElement = document.activeElement; + + const isInputFocused = + activeElement?.tagName === 'INPUT' || + activeElement?.tagName === 'TEXTAREA' || + activeElement?.hasAttribute('contenteditable'); + + if (e.key === '/' && !isInputFocused) { + e.preventDefault(); + inputRef.current?.focus(); + } + }; + document.addEventListener('keydown', handleKeyDown); return () => { diff --git a/ui/components/MessageInput.tsx b/ui/components/MessageInput.tsx index 2229cdf..05d44a6 100644 --- a/ui/components/MessageInput.tsx +++ b/ui/components/MessageInput.tsx @@ -27,14 +27,21 @@ const MessageInput = ({ const inputRef = useRef<HTMLTextAreaElement | null>(null); - const handleKeyDown = (e: KeyboardEvent) => { - if (e.key === '/') { - e.preventDefault(); - inputRef.current?.focus(); - } - }; - useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + const activeElement = document.activeElement; + + const isInputFocused = + activeElement?.tagName === 'INPUT' || + activeElement?.tagName === 'TEXTAREA' || + activeElement?.hasAttribute('contenteditable'); + + if (e.key === '/' && !isInputFocused) { + e.preventDefault(); + inputRef.current?.focus(); + } + }; + document.addEventListener('keydown', handleKeyDown); return () => { diff --git a/ui/components/SettingsDialog.tsx b/ui/components/SettingsDialog.tsx index 174f996..f86d9de 100644 --- a/ui/components/SettingsDialog.tsx +++ b/ui/components/SettingsDialog.tsx @@ -226,9 +226,13 @@ const SettingsDialog = ({ value={selectedChatModelProvider ?? undefined} onChange={(e) => { setSelectedChatModelProvider(e.target.value); - setSelectedChatModel( - config.chatModelProviders[e.target.value][0], - ); + if (e.target.value === 'custom_openai') { + setSelectedChatModel(''); + } else { + setSelectedChatModel( + config.chatModelProviders[e.target.value][0], + ); + } }} options={Object.keys(config.chatModelProviders).map( (provider) => ({ diff --git a/ui/package.json b/ui/package.json index 6d265c6..23afda2 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,6 +1,6 @@ { "name": "perplexica-frontend", - "version": "1.8.0", + "version": "1.9.0-rc2", "license": "MIT", "author": "ItzCrazyKns", "scripts": { diff --git a/yarn.lock b/yarn.lock index 1a25c1c..1c34e1d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -509,6 +509,14 @@ resolved "https://registry.yarnpkg.com/@protobufjs/utf8/-/utf8-1.1.0.tgz#a777360b5b39a1a2e5106f8e858f2fd2d060c570" integrity sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw== +"@selderee/plugin-htmlparser2@^0.11.0": + version "0.11.0" + resolved "https://registry.yarnpkg.com/@selderee/plugin-htmlparser2/-/plugin-htmlparser2-0.11.0.tgz#d5b5e29a7ba6d3958a1972c7be16f4b2c188c517" + integrity sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ== + dependencies: + domhandler "^5.0.3" + selderee "^0.11.0" + "@tsconfig/node10@^1.0.7": version "1.0.11" resolved "https://registry.yarnpkg.com/@tsconfig/node10/-/node10-1.0.11.tgz#6ee46400685f130e278128c7b38b7e031ff5b2f2" @@ -578,6 +586,11 @@ "@types/qs" "*" "@types/serve-static" "*" +"@types/html-to-text@^9.0.4": + version "9.0.4" + resolved "https://registry.yarnpkg.com/@types/html-to-text/-/html-to-text-9.0.4.tgz#4a83dd8ae8bfa91457d0b1ffc26f4d0537eff58c" + integrity sha512-pUY3cKH/Nm2yYrEmDlPR1mR7yszjGx4DrwPjQ702C4/D5CwHuZTgZdIdwPkRbcuhs7BAh2L5rg3CL5cbRiGTCQ== + "@types/http-errors@*": version "2.0.4" resolved "https://registry.yarnpkg.com/@types/http-errors/-/http-errors-2.0.4.tgz#7eb47726c391b7345a6ec35ad7f4de469cf5ba4f" @@ -622,6 +635,11 @@ dependencies: undici-types "~5.26.4" +"@types/pdf-parse@^1.1.4": + version "1.1.4" + resolved "https://registry.yarnpkg.com/@types/pdf-parse/-/pdf-parse-1.1.4.tgz#21a539efd2f16009d08aeed3350133948b5d7ed1" + integrity sha512-+gbBHbNCVGGYw1S9lAIIvrHW47UYOhMIFUsJcMkMrzy1Jf0vulBN3XQIjPgnoOXveMuHnF3b57fXROnY/Or7eg== + "@types/qs@*": version "6.9.14" resolved "https://registry.yarnpkg.com/@types/qs/-/qs-6.9.14.tgz#169e142bfe493895287bee382af6039795e9b75b" @@ -1092,6 +1110,13 @@ debug@2.6.9: dependencies: ms "2.0.0" +debug@^3.1.0: + version "3.2.7" + resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a" + integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ== + dependencies: + ms "^2.1.1" + debug@^4: version "4.3.4" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" @@ -1123,6 +1148,11 @@ deep-extend@^0.6.0: resolved "https://registry.yarnpkg.com/deep-extend/-/deep-extend-0.6.0.tgz#c4fa7c95404a17a9c3e8ca7e1537312b736330ac" integrity sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA== +deepmerge@^4.3.1: + version "4.3.1" + resolved "https://registry.yarnpkg.com/deepmerge/-/deepmerge-4.3.1.tgz#44b5f2147cd3b00d4b56137685966f26fd25dd4a" + integrity sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A== + define-data-property@^1.1.4: version "1.1.4" resolved "https://registry.yarnpkg.com/define-data-property/-/define-data-property-1.1.4.tgz#894dc141bb7d3060ae4366f6a0107e68fbe48c5e" @@ -1165,6 +1195,36 @@ digest-fetch@^1.3.0: base-64 "^0.1.0" md5 "^2.3.0" +dom-serializer@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-2.0.0.tgz#e41b802e1eedf9f6cae183ce5e622d789d7d8e53" + integrity sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg== + dependencies: + domelementtype "^2.3.0" + domhandler "^5.0.2" + entities "^4.2.0" + +domelementtype@^2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.3.0.tgz#5c45e8e869952626331d7aab326d01daf65d589d" + integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw== + +domhandler@^5.0.2, domhandler@^5.0.3: + version "5.0.3" + resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-5.0.3.tgz#cc385f7f751f1d1fc650c21374804254538c7d31" + integrity sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w== + dependencies: + domelementtype "^2.3.0" + +domutils@^3.0.1: + version "3.1.0" + resolved "https://registry.yarnpkg.com/domutils/-/domutils-3.1.0.tgz#c47f551278d3dc4b0b1ab8cbb42d751a6f0d824e" + integrity sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA== + dependencies: + dom-serializer "^2.0.0" + domelementtype "^2.3.0" + domhandler "^5.0.3" + dotenv@^16.4.5: version "16.4.5" resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-16.4.5.tgz#cdd3b3b604cb327e286b4762e13502f717cb099f" @@ -1206,6 +1266,11 @@ end-of-stream@^1.1.0, end-of-stream@^1.4.1: dependencies: once "^1.4.0" +entities@^4.2.0, entities@^4.4.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" + integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== + es-define-property@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/es-define-property/-/es-define-property-1.0.0.tgz#c7faefbdff8b2696cf5f46921edfb77cc4ba3845" @@ -1529,6 +1594,27 @@ hasown@^2.0.0: dependencies: function-bind "^1.1.2" +html-to-text@^9.0.5: + version "9.0.5" + resolved "https://registry.yarnpkg.com/html-to-text/-/html-to-text-9.0.5.tgz#6149a0f618ae7a0db8085dca9bbf96d32bb8368d" + integrity sha512-qY60FjREgVZL03vJU6IfMV4GDjGBIoOyvuFdpBDIX9yTlDw0TjxVBQp+P8NvpdIXNJvfWBTNul7fsAQJq2FNpg== + dependencies: + "@selderee/plugin-htmlparser2" "^0.11.0" + deepmerge "^4.3.1" + dom-serializer "^2.0.0" + htmlparser2 "^8.0.2" + selderee "^0.11.0" + +htmlparser2@^8.0.2: + version "8.0.2" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21" + integrity sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA== + dependencies: + domelementtype "^2.3.0" + domhandler "^5.0.3" + domutils "^3.0.1" + entities "^4.4.0" + http-errors@2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-2.0.0.tgz#b7774a1486ef73cf7667ac9ae0858c012c57b9d3" @@ -1727,6 +1813,11 @@ langsmith@~0.1.30: p-retry "4" uuid "^9.0.0" +leac@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/leac/-/leac-0.6.0.tgz#dcf136e382e666bd2475f44a1096061b70dc0912" + integrity sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg== + lodash.set@^4.3.2: version "4.3.2" resolved "https://registry.yarnpkg.com/lodash.set/-/lodash.set-4.3.2.tgz#d8757b1da807dde24816b0d6a84bea1a76230b23" @@ -1907,6 +1998,11 @@ node-domexception@1.0.0: resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5" integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ== +node-ensure@^0.0.0: + version "0.0.0" + resolved "https://registry.yarnpkg.com/node-ensure/-/node-ensure-0.0.0.tgz#ecae764150de99861ec5c810fd5d096b183932a7" + integrity sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw== + node-fetch@^2.6.7: version "2.7.0" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d" @@ -2071,6 +2167,14 @@ p-timeout@^3.2.0: dependencies: p-finally "^1.0.0" +parseley@^0.12.0: + version "0.12.1" + resolved "https://registry.yarnpkg.com/parseley/-/parseley-0.12.1.tgz#4afd561d50215ebe259e3e7a853e62f600683aef" + integrity sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw== + dependencies: + leac "^0.6.0" + peberminta "^0.9.0" + parseurl@~1.3.3: version "1.3.3" resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.3.tgz#9da19e7bee8d12dff0513ed5b76957793bc2e8d4" @@ -2081,6 +2185,19 @@ path-to-regexp@0.1.7: resolved "https://registry.yarnpkg.com/path-to-regexp/-/path-to-regexp-0.1.7.tgz#df604178005f522f15eb4490e7247a1bfaa67f8c" integrity sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ== +pdf-parse@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/pdf-parse/-/pdf-parse-1.1.1.tgz#745e07408679548b3995ff896fd38e96e19d14a7" + integrity sha512-v6ZJ/efsBpGrGGknjtq9J/oC8tZWq0KWL5vQrk2GlzLEQPUDB1ex+13Rmidl1neNN358Jn9EHZw5y07FFtaC7A== + dependencies: + debug "^3.1.0" + node-ensure "^0.0.0" + +peberminta@^0.9.0: + version "0.9.0" + resolved "https://registry.yarnpkg.com/peberminta/-/peberminta-0.9.0.tgz#8ec9bc0eb84b7d368126e71ce9033501dca2a352" + integrity sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ== + picomatch@^2.0.4, picomatch@^2.2.1: version "2.3.1" resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" @@ -2242,6 +2359,13 @@ safe-stable-stringify@^2.3.1: resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== +selderee@^0.11.0: + version "0.11.0" + resolved "https://registry.yarnpkg.com/selderee/-/selderee-0.11.0.tgz#6af0c7983e073ad3e35787ffe20cefd9daf0ec8a" + integrity sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA== + dependencies: + parseley "^0.12.0" + semver@^7.3.5, semver@^7.5.3, semver@^7.5.4: version "7.6.0" resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.0.tgz#1a46a4db4bffcccd97b743b5005c8325f23d4e2d"