diff --git a/readthedocs/domaindata/__init__.py b/readthedocs/domaindata/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/domaindata/admin.py b/readthedocs/domaindata/admin.py new file mode 100644 index 00000000000..d24542669f9 --- /dev/null +++ b/readthedocs/domaindata/admin.py @@ -0,0 +1,11 @@ +from django.contrib import admin +from .models import DomainData + + +class DomainDataAdmin(admin.ModelAdmin): + list_filter = ('type', 'project') + raw_id_fields = ('project', 'version') + search_fields = ('doc_name', 'name') + + +admin.site.register(DomainData, DomainDataAdmin) diff --git a/readthedocs/domaindata/api.py b/readthedocs/domaindata/api.py new file mode 100644 index 00000000000..5499e931260 --- /dev/null +++ b/readthedocs/domaindata/api.py @@ -0,0 +1,26 @@ +from rest_framework import serializers + +from readthedocs.restapi.views.model_views import UserSelectViewSet +from .models import DomainData + + +class DomainDataSerializer(serializers.ModelSerializer): + project = serializers.SlugRelatedField(slug_field='slug', read_only=True) + version = serializers.SlugRelatedField(slug_field='slug', read_only=True) + + class Meta: + model = DomainData + fields = ('project', 'version', 'name', 'display_name', 'doc_type', 'doc_url') + + +class DomainDataAdminSerializer(DomainDataSerializer): + + class Meta(DomainDataSerializer.Meta): + fields = '__all__' + + +class DomainDataAPIView(UserSelectViewSet): + model = DomainData + serializer_class = DomainDataSerializer + admin_serializer_class = DomainDataAdminSerializer + filter_fields = ('project__slug', 'version__slug', 'domain', 'type', 'doc_name', 'name') diff --git a/readthedocs/domaindata/models.py b/readthedocs/domaindata/models.py new file mode 100644 index 00000000000..9f3002bb247 --- /dev/null +++ b/readthedocs/domaindata/models.py @@ -0,0 +1,72 @@ +from django.db import models +from django.utils.encoding import python_2_unicode_compatible +from django.utils.translation import ugettext_lazy as _ + +from readthedocs.builds.models import Version +from readthedocs.core.resolver import resolve +from readthedocs.projects.models import Project +from readthedocs.projects.querysets import RelatedProjectQuerySet + + +@python_2_unicode_compatible +class DomainData(models.Model): + + """ + Information from a project about it's Sphinx domains. + + This captures data about API objects that exist in that codebase. + """ + + project = models.ForeignKey( + Project, + related_name='domain_data', + ) + version = models.ForeignKey(Version, verbose_name=_('Version'), + related_name='domain_data') + modified_date = models.DateTimeField(_('Publication date'), auto_now=True) + commit = models.CharField(_('Commit'), max_length=255) + + domain = models.CharField( + _('Domain'), + max_length=255, + ) + name = models.CharField( + _('Name'), + max_length=255, + ) + display_name = models.CharField( + _('Display Name'), + max_length=255, + ) + type = models.CharField( + _('Type'), + max_length=255, + ) + doc_name = models.CharField( + _('Doc Name'), + max_length=255, + ) + anchor = models.CharField( + _('Anchor'), + max_length=255, + ) + objects = RelatedProjectQuerySet.as_manager() + + def __str__(self): + return f''' + DomainData [{self.project.slug}:{self.version.slug}] + [{self.domain}:{self.type}] {self.name} -> {self.doc_name}#{self.anchor} + ''' + + @property + def doc_type(self): + return f'{self.domain}:{self.type}' + + @property + def doc_url(self): + path = self.doc_name + if self.anchor: + path += f'#{self.anchor}' + full_url = resolve( + project=self.project, version_slug=self.version.slug, filename=path) + return full_url diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index a8b10eecd09..bd409396bcd 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -13,6 +13,7 @@ import json import logging import os +import sys import shutil import socket from collections import Counter, defaultdict @@ -25,6 +26,8 @@ from django.utils import timezone from django.utils.translation import ugettext_lazy as _ from slumber.exceptions import HttpClientError +from sphinx.ext import intersphinx + from readthedocs.builds.constants import ( BUILD_STATE_BUILDING, @@ -58,6 +61,7 @@ ) from readthedocs.doc_builder.loader import get_builder_class from readthedocs.doc_builder.python_environments import Conda, Virtualenv +from readthedocs.domaindata.models import DomainData from readthedocs.projects.models import APIProject from readthedocs.restapi.client import api as api_v2 from readthedocs.vcs_support import utils as vcs_support_utils @@ -1136,6 +1140,7 @@ def fileify(version_pk, commit): ), ) _manage_imported_files(version, path, commit) + _update_intersphinx_data(version, path, commit) else: log.info( LOG_TEMPLATE.format( @@ -1146,6 +1151,56 @@ def fileify(version_pk, commit): ) +def _update_intersphinx_data(version, path, commit): + """ + Update intersphinx data for this version + + :param version: Version instance + :param path: Path to search + :param commit: Commit that updated path + """ + object_file = os.path.join(path, 'objects.inv') + + class MockConfig: + intersphinx_timeout = None # type: int + tls_verify = False + + class MockApp: + srcdir = '' + config = MockConfig() + + def warn(self, msg): + # type: (unicode) -> None + print(msg, file=sys.stderr) + + invdata = intersphinx.fetch_inventory(MockApp(), '', object_file) + for key in sorted(invdata or {}): + domain, _type = key.split(':') + for name, einfo in sorted(invdata[key].items()): + url = einfo[2] + if '#' in url: + doc_name, anchor = url.split('#') + else: + doc_name, anchor = url, '' + display_name = einfo[3] + obj, _ = DomainData.objects.get_or_create( + project=version.project, + version=version, + domain=domain, + name=name, + display_name=display_name, + type=_type, + doc_name=doc_name, + anchor=anchor, + ) + if obj.commit != commit: + obj.commit = commit + obj.save() + DomainData.objects.filter(project=version.project, + version=version + ).exclude(commit=commit).delete() + + def _manage_imported_files(version, path, commit): """ Update imported files for version. diff --git a/readthedocs/projects/urls/public.py b/readthedocs/projects/urls/public.py index 7f3868287d6..101ca96fe4b 100644 --- a/readthedocs/projects/urls/public.py +++ b/readthedocs/projects/urls/public.py @@ -51,7 +51,7 @@ ), url( r'^(?P{project_slug})/search/$'.format(**pattern_opts), - search_views.elastic_project_search, + search_views.elastic_search, name='elastic_project_search', ), url( diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index cadf531c595..2016b2f1463 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -25,6 +25,7 @@ SocialAccountViewSet, VersionViewSet, ) +from readthedocs.domaindata.api import DomainDataAPIView router = routers.DefaultRouter() @@ -34,6 +35,7 @@ router.register(r'project', ProjectViewSet, basename='project') router.register(r'notification', NotificationViewSet, basename='emailhook') router.register(r'domain', DomainViewSet, basename='domain') +router.register(r'domaindata', DomainDataAPIView, base_name='domaindata') router.register( r'remote/org', RemoteOrganizationViewSet, diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 0fcf0d69a3f..6b54d8c3f5a 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -4,8 +4,8 @@ from django.conf import settings from django_elasticsearch_dsl import DocType, Index, fields -from readthedocs.projects.models import HTMLFile, Project - +from readthedocs.projects.models import Project, HTMLFile +from readthedocs.domaindata.models import DomainData project_conf = settings.ES_INDEXES['project'] project_index = Index(project_conf['name']) @@ -15,9 +15,52 @@ page_index = Index(page_conf['name']) page_index.settings(**page_conf['settings']) +domain_conf = settings.ES_INDEXES['domain'] +domain_index = Index(domain_conf['name']) +domain_index.settings(**domain_conf['settings']) + log = logging.getLogger(__name__) +@domain_index.doc_type +class DomainDocument(DocType): + project = fields.KeywordField(attr='project.slug') + version = fields.KeywordField(attr='version.slug') + doc_type = fields.KeywordField(attr='doc_type') + anchor = fields.KeywordField(attr='anchor') + + class Meta(object): + model = DomainData + fields = ('name', 'display_name', 'doc_name') + ignore_signals = True + + @classmethod + def faceted_search(cls, query, user, doc_type=None): + from readthedocs.search.faceted_search import DomainSearch + kwargs = { + 'user': user, + 'query': query, + } + + if doc_type: + kwargs['filters'] = {'doc_type': doc_type} + + return DomainSearch(**kwargs) + + def get_queryset(self): + """Overwrite default queryset to filter certain files to index""" + queryset = super().get_queryset() + + # Exclude some types to not index + excluded_types = ['std:doc', 'std:label'] + + # Do not index files that belong to non sphinx project + # Also do not index certain files + for exclude in excluded_types: + queryset = queryset.exclude(type=exclude) + return queryset + + @project_index.doc_type class ProjectDocument(DocType): diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 627d78ffbe7..c9d29bff6d9 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -1,17 +1,40 @@ +# -*- coding: utf-8 -*- import logging from elasticsearch_dsl import FacetedSearch, TermsFacet from elasticsearch_dsl.query import Bool, SimpleQueryString +from readthedocs.search.documents import ( + DomainDocument, + PageDocument, + ProjectDocument, +) from readthedocs.core.utils.extend import SettingsOverrideObject -from readthedocs.search.documents import PageDocument, ProjectDocument log = logging.getLogger(__name__) +ALL_FACETS = ['project', 'version', 'doc_type', 'language', 'index'] + class RTDFacetedSearch(FacetedSearch): def __init__(self, user, **kwargs): + self.user = user + self.filter_by_user = kwargs.pop('filter_by_user', None) + + # Set filters properly + for facet in self.facets: + if facet in kwargs: + kwargs.setdefault('filters', {})[facet] = kwargs.pop(facet) + + # Don't pass along unnecessary filters + for f in ALL_FACETS: + if f in kwargs: + del kwargs[f] + + super().__init__(**kwargs) + + def search(self): """ Pass in a user in order to filter search results by privacy. @@ -20,19 +43,36 @@ def __init__(self, user, **kwargs): The `self.user` attribute isn't currently used on the .org, but is used on the .com """ - self.user = user - self.filter_by_user = kwargs.pop('filter_by_user', None) - super().__init__(**kwargs) + s = super().search() + s = s.source(exclude=['content', 'headers']) + # Return 25 results + return s[:25] def query(self, search, query): """ Add query part to ``search`` when needed. - Also does HTML encoding of results to avoid XSS issues. + Also: + + * Adds SimpleQueryString instead of default query. + * Adds HTML encoding of results to avoid XSS issues. """ - search = super().query(search, query) search = search.highlight_options(encoder='html', number_of_fragments=3) - search = search.source(exclude=['content', 'headers']) + + all_queries = [] + + # need to search for both 'and' and 'or' operations + # the score of and should be higher as it satisfies both or and and + for operator in ['and', 'or']: + query_string = SimpleQueryString( + query=query, fields=self.fields, default_operator=operator + ) + all_queries.append(query_string) + + # run bool query with should, so it returns result where either of the query matches + bool_query = Bool(should=all_queries) + + search = search.query(bool_query) return search @@ -52,26 +92,16 @@ class PageSearchBase(RTDFacetedSearch): index = PageDocument._doc_type.index fields = ['title^10', 'headers^5', 'content'] - def query(self, search, query): - """Use a custom SimpleQueryString instead of default query.""" - - search = super().query(search, query) - - all_queries = [] - # need to search for both 'and' and 'or' operations - # the score of and should be higher as it satisfies both or and and - for operator in ['AND', 'OR']: - query_string = SimpleQueryString( - query=query, fields=self.fields, default_operator=operator - ) - all_queries.append(query_string) - - # run bool query with should, so it returns result where either of the query matches - bool_query = Bool(should=all_queries) - - search = search.query(bool_query) - return search +class DomainSearchBase(RTDFacetedSearch): + facets = { + 'project': TermsFacet(field='project'), + 'version': TermsFacet(field='version'), + 'doc_type': TermsFacet(field='doc_type'), + } + doc_types = [DomainDocument] + index = DomainDocument._doc_type.index + fields = ('display_name^5', 'name') class PageSearch(SettingsOverrideObject): @@ -94,3 +124,30 @@ class ProjectSearch(SettingsOverrideObject): """ _default_class = ProjectSearchBase + + +class DomainSearch(SettingsOverrideObject): + + """ + Allow this class to be overridden based on CLASS_OVERRIDES setting. + + This is primary used on the .com to adjust how we filter our search queries + """ + + _default_class = DomainSearchBase + + +class AllSearch(RTDFacetedSearch): + facets = { + 'project': TermsFacet(field='project'), + 'version': TermsFacet(field='version'), + 'language': TermsFacet(field='language'), + 'doc_type': TermsFacet(field='doc_type'), + 'index': TermsFacet(field='_index'), + } + doc_types = [DomainDocument, PageDocument, ProjectDocument] + index = [DomainDocument._doc_type.index, + PageDocument._doc_type.index, + ProjectDocument._doc_type.index] + fields = ('title^10', 'headers^5', 'content', 'name^20', + 'slug^5', 'description', 'display_name^5') diff --git a/readthedocs/search/static/search/readthedocs-client.js b/readthedocs/search/static/search/readthedocs-client.js new file mode 100644 index 00000000000..8d5620d2f26 --- /dev/null +++ b/readthedocs/search/static/search/readthedocs-client.js @@ -0,0 +1,804 @@ +(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);throw new Error("Cannot find module '"+o+"'")}var f=n[o]={exports:{}};t[o][0].call(f.exports,function(e){var n=t[o][1][e];return s(n?n:e)},f,f.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 0) { + self._completeHandlers.shift()(resp) + } + } + + function success (resp) { + var type = o['type'] || resp && setType(resp.getResponseHeader('Content-Type')) // resp can be undefined in IE + resp = (type !== 'jsonp') ? self.request : resp + // use global data filter on response text + var filteredResponse = globalSetupOptions.dataFilter(resp.responseText, type) + , r = filteredResponse + try { + resp.responseText = r + } catch (e) { + // can't assign this in IE<=8, just ignore + } + if (r) { + switch (type) { + case 'json': + try { + resp = win.JSON ? win.JSON.parse(r) : eval('(' + r + ')') + } catch (err) { + return error(resp, 'Could not parse JSON in response', err) + } + break + case 'js': + resp = eval(r) + break + case 'html': + resp = r + break + case 'xml': + resp = resp.responseXML + && resp.responseXML.parseError // IE trololo + && resp.responseXML.parseError.errorCode + && resp.responseXML.parseError.reason + ? null + : resp.responseXML + break + } + } + + self._responseArgs.resp = resp + self._fulfilled = true + fn(resp) + self._successHandler(resp) + while (self._fulfillmentHandlers.length > 0) { + resp = self._fulfillmentHandlers.shift()(resp) + } + + complete(resp) + } + + function timedOut() { + self._timedOut = true + self.request.abort() + } + + function error(resp, msg, t) { + resp = self.request + self._responseArgs.resp = resp + self._responseArgs.msg = msg + self._responseArgs.t = t + self._erred = true + while (self._errorHandlers.length > 0) { + self._errorHandlers.shift()(resp, msg, t) + } + complete(resp) + } + + this.request = getRequest.call(this, success, error) + } + + Reqwest.prototype = { + abort: function () { + this._aborted = true + this.request.abort() + } + + , retry: function () { + init.call(this, this.o, this.fn) + } + + /** + * Small deviation from the Promises A CommonJs specification + * http://wiki.commonjs.org/wiki/Promises/A + */ + + /** + * `then` will execute upon successful requests + */ + , then: function (success, fail) { + success = success || function () {} + fail = fail || function () {} + if (this._fulfilled) { + this._responseArgs.resp = success(this._responseArgs.resp) + } else if (this._erred) { + fail(this._responseArgs.resp, this._responseArgs.msg, this._responseArgs.t) + } else { + this._fulfillmentHandlers.push(success) + this._errorHandlers.push(fail) + } + return this + } + + /** + * `always` will execute whether the request succeeds or fails + */ + , always: function (fn) { + if (this._fulfilled || this._erred) { + fn(this._responseArgs.resp) + } else { + this._completeHandlers.push(fn) + } + return this + } + + /** + * `fail` will execute when the request fails + */ + , fail: function (fn) { + if (this._erred) { + fn(this._responseArgs.resp, this._responseArgs.msg, this._responseArgs.t) + } else { + this._errorHandlers.push(fn) + } + return this + } + , 'catch': function (fn) { + return this.fail(fn) + } + } + + function reqwest(o, fn) { + return new Reqwest(o, fn) + } + + // normalize newline variants according to spec -> CRLF + function normalize(s) { + return s ? s.replace(/\r?\n/g, '\r\n') : '' + } + + function serial(el, cb) { + var n = el.name + , t = el.tagName.toLowerCase() + , optCb = function (o) { + // IE gives value="" even where there is no value attribute + // 'specified' ref: http://www.w3.org/TR/DOM-Level-3-Core/core.html#ID-862529273 + if (o && !o['disabled']) + cb(n, normalize(o['attributes']['value'] && o['attributes']['value']['specified'] ? o['value'] : o['text'])) + } + , ch, ra, val, i + + // don't serialize elements that are disabled or without a name + if (el.disabled || !n) return + + switch (t) { + case 'input': + if (!/reset|button|image|file/i.test(el.type)) { + ch = /checkbox/i.test(el.type) + ra = /radio/i.test(el.type) + val = el.value + // WebKit gives us "" instead of "on" if a checkbox has no value, so correct it here + ;(!(ch || ra) || el.checked) && cb(n, normalize(ch && val === '' ? 'on' : val)) + } + break + case 'textarea': + cb(n, normalize(el.value)) + break + case 'select': + if (el.type.toLowerCase() === 'select-one') { + optCb(el.selectedIndex >= 0 ? el.options[el.selectedIndex] : null) + } else { + for (i = 0; el.length && i < el.length; i++) { + el.options[i].selected && optCb(el.options[i]) + } + } + break + } + } + + // collect up all form elements found from the passed argument elements all + // the way down to child elements; pass a '
' or form fields. + // called with 'this'=callback to use for serial() on each element + function eachFormElement() { + var cb = this + , e, i + , serializeSubtags = function (e, tags) { + var i, j, fa + for (i = 0; i < tags.length; i++) { + fa = e[byTag](tags[i]) + for (j = 0; j < fa.length; j++) serial(fa[j], cb) + } + } + + for (i = 0; i < arguments.length; i++) { + e = arguments[i] + if (/input|select|textarea/i.test(e.tagName)) serial(e, cb) + serializeSubtags(e, [ 'input', 'select', 'textarea' ]) + } + } + + // standard query string style serialization + function serializeQueryString() { + return reqwest.toQueryString(reqwest.serializeArray.apply(null, arguments)) + } + + // { 'name': 'value', ... } style serialization + function serializeHash() { + var hash = {} + eachFormElement.apply(function (name, value) { + if (name in hash) { + hash[name] && !isArray(hash[name]) && (hash[name] = [hash[name]]) + hash[name].push(value) + } else hash[name] = value + }, arguments) + return hash + } + + // [ { name: 'name', value: 'value' }, ... ] style serialization + reqwest.serializeArray = function () { + var arr = [] + eachFormElement.apply(function (name, value) { + arr.push({name: name, value: value}) + }, arguments) + return arr + } + + reqwest.serialize = function () { + if (arguments.length === 0) return '' + var opt, fn + , args = Array.prototype.slice.call(arguments, 0) + + opt = args.pop() + opt && opt.nodeType && args.push(opt) && (opt = null) + opt && (opt = opt.type) + + if (opt == 'map') fn = serializeHash + else if (opt == 'array') fn = reqwest.serializeArray + else fn = serializeQueryString + + return fn.apply(null, args) + } + + reqwest.toQueryString = function (o, trad) { + var prefix, i + , traditional = trad || false + , s = [] + , enc = encodeURIComponent + , add = function (key, value) { + // If value is a function, invoke it and return its value + value = ('function' === typeof value) ? value() : (value == null ? '' : value) + s[s.length] = enc(key) + '=' + enc(value) + } + // If an array was passed in, assume that it is an array of form elements. + if (isArray(o)) { + for (i = 0; o && i < o.length; i++) add(o[i]['name'], o[i]['value']) + } else { + // If traditional, encode the "old" way (the way 1.3.2 or older + // did it), otherwise encode params recursively. + for (prefix in o) { + if (o.hasOwnProperty(prefix)) buildParams(prefix, o[prefix], traditional, add) + } + } + + // spaces should be + according to spec + return s.join('&').replace(/%20/g, '+') + } + + function buildParams(prefix, obj, traditional, add) { + var name, i, v + , rbracket = /\[\]$/ + + if (isArray(obj)) { + // Serialize array item. + for (i = 0; obj && i < obj.length; i++) { + v = obj[i] + if (traditional || rbracket.test(prefix)) { + // Treat each array item as a scalar. + add(prefix, v) + } else { + buildParams(prefix + '[' + (typeof v === 'object' ? i : '') + ']', v, traditional, add) + } + } + } else if (obj && obj.toString() === '[object Object]') { + // Serialize object item. + for (name in obj) { + buildParams(prefix + '[' + name + ']', obj[name], traditional, add) + } + + } else { + // Serialize scalar item. + add(prefix, obj) + } + } + + reqwest.getcallbackPrefix = function () { + return callbackPrefix + } + + // jQuery and Zepto compatibility, differences can be remapped here so you can call + // .ajax.compat(options, callback) + reqwest.compat = function (o, fn) { + if (o) { + o['type'] && (o['method'] = o['type']) && delete o['type'] + o['dataType'] && (o['type'] = o['dataType']) + o['jsonpCallback'] && (o['jsonpCallbackName'] = o['jsonpCallback']) && delete o['jsonpCallback'] + o['jsonp'] && (o['jsonpCallback'] = o['jsonp']) + } + return new Reqwest(o, fn) + } + + reqwest.ajaxSetup = function (options) { + options = options || {} + for (var k in options) { + globalSetupOptions[k] = options[k] + } + } + + return reqwest +}); + +},{}],2:[function(require,module,exports){ +// Document response + +// Page +var Page = function (project, version, doc) { + this.project = project; + this.version = version; + this.doc = doc; + + this.url = null; + this.sections = []; +}; + +Page.prototype.section = function (section) { + return new Section(this.project, this.version, this.doc, section); +}; + +// Section +var Section = function (project, version, doc, section) { + this.project = project; + this.version = version; + this.doc = doc; + this.section = section; + + this.url = null; + this.content = null; + this.wrapped = null; +} + +// Add iframe with returned content to page +Section.prototype.insertContent = function (elem) { + var iframe = document.createElement('iframe'), + self = this; + + iframe.style.display = 'none'; + + if (window.jQuery && elem instanceof window.jQuery) { + elem = elem.get(0); + } + + if (typeof(elem) != 'undefined') { + while (elem.children.length > 0) { + elem.firstChild.remove(); + } + elem.appendChild(iframe); + } + + var win = iframe.contentWindow; + + win.document.open(); + win.document.write(this.content); + win.document.close(); + + var head = win.document.head, + body = win.document.body, + base = null; + + if (head) { + base = win.document.createElement('base'); + base.target = '_parent'; + base.href = this.url; + head.appendChild(base); + + // Copy linked stylesheets from parent + var link_elems = document.head.getElementsByTagName('link'); + for (var n = 0; n < link_elems.length; n++) { + var link = link_elems[n]; + if (link.rel == 'stylesheet') { + head.appendChild(link.cloneNode()); + } + } + } + + win.onload = function () { + iframe.style.display = 'inline-block'; + }; + + return iframe; +}; + + +exports.Section = Section; +exports.Page = Page; + +},{}],3:[function(require,module,exports){ +/* Read the Docs Embed functions */ + +var doc = require('./doc'), + Section = doc.Section, + Page = doc.Page; + + +var Embed = function (config) { + this._api_host = 'https://api.grokthedocs.com'; + if (typeof config == 'object') { + if ('api_host' in config) { + this._api_host = config['api_host']; + } + } +}; + +Embed.prototype.section = function (project, version, doc, section, + callback, error_callback) { + callback = callback || function () {}; + error_callback = error_callback || function () {}; + + var self = this, + data = { + 'project': project, + 'version': version, + 'doc': doc, + 'section': section + }; + + this._getObject( + data, + function (resp) { + var section_ret = new Section(project, version, doc, section); + section_ret.url = resp.url; + section_ret.content = resp.content; + section_ret.wrapped = resp.wrapped; + callback(section_ret); + }, + function (error, msg) { + error_callback(error); + } + ); +}; + +Embed.prototype.page = function (project, version, doc, callback, + error_callback) { + + var self = this, + data = { + 'project': project, + 'version': version, + 'doc': doc, + }; + + this._getObject( + data, + function (resp) { + var page = new Page(project, version, doc); + page.url = resp.url; + // TODO headers is misleading here, rename it on the API + page.sections = resp.headers; + callback(page); + }, + function (error, msg) { + error_callback(error); + } + ) +}; + +Embed.prototype._getObject = function (data, callback, error_callback) { + var self = this, + reqwest = require("./../bower_components/reqwest/reqwest.js"); + callback = callback || function () {}; + error_callback = error_callback || function () {}; + + return reqwest({ + url: this._api_host + '/api/v1/embed/', + method: 'get', + contentType: 'application/json', + crossDomain: true, + headers: {'Accept': 'application/json'}, + data: data, + success: callback, + error: error_callback + }); +}; + +exports.Embed = Embed; + +},{"./../bower_components/reqwest/reqwest.js":1,"./doc":2}],4:[function(require,module,exports){ +/* Read the Docs Client */ + +var embed = require('./embed'); + + +exports.Embed = embed.Embed; + +if (typeof window != 'undefined') { + window.Embed = embed.Embed; +} + +},{"./embed":3}]},{},[4]) \ No newline at end of file diff --git a/readthedocs/search/static/search/search-embed.js b/readthedocs/search/static/search/search-embed.js new file mode 100644 index 00000000000..e1c970aed8a --- /dev/null +++ b/readthedocs/search/static/search/search-embed.js @@ -0,0 +1,18 @@ +$(document).ready(function () { + var config = {api_host: 'http://localhost:8000'} + var embed = new Embed(config); + $('.fragment').one("click mouseover", + function(elem) { + embed.section( + elem.currentTarget.getAttribute('project'), + elem.currentTarget.getAttribute('version'), + elem.currentTarget.getAttribute('doc_name').replace('.html', ''), + elem.currentTarget.getAttribute('anchor'), + function (section) { + section.insertContent(elem.currentTarget); + } + ) + } + ) +}) + diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 34366d1d042..4ae9e5a4154 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -8,10 +8,10 @@ from django.shortcuts import get_object_or_404, render from readthedocs.builds.constants import LATEST -from readthedocs.search.documents import PageDocument, ProjectDocument -from readthedocs.search.utils import get_project_list_or_404 from readthedocs.projects.models import Project - +from readthedocs.search.faceted_search import ( + AllSearch, ProjectSearch, PageSearch, DomainSearch, ALL_FACETS +) log = logging.getLogger(__name__) LOG_TEMPLATE = '(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}' @@ -25,42 +25,71 @@ 'version', 'taxonomy', 'language', + 'doc_type', + 'index', ), ) -def elastic_search(request): - """Use Elasticsearch for global search.""" +def elastic_search(request, project_slug=None): + """ + Global user search on the dashboard + + This is for both the main search and project search. + + :param project_slug: Sent when the view is a project search + """ + _type = None + + if project_slug: + queryset = Project.objects.protected(request.user) + project_obj = get_object_or_404(queryset, slug=project_slug) + _type = request.GET.get('type', 'file') + user_input = UserInput( query=request.GET.get('q'), - type=request.GET.get('type', 'project'), - project=request.GET.get('project'), + type=_type or request.GET.get('type', 'project'), + project=project_slug or request.GET.get('project'), version=request.GET.get('version', LATEST), taxonomy=request.GET.get('taxonomy'), language=request.GET.get('language'), + doc_type=request.GET.get('doc_type'), + index=request.GET.get('index'), ) + results = '' facets = {} if user_input.query: + kwargs = {} + + for avail_facet in ALL_FACETS: + value = getattr(user_input, avail_facet, None) + if value: + kwargs[avail_facet] = value + if user_input.type == 'project': - project_search = ProjectDocument.faceted_search( - query=user_input.query, user=request.user, language=user_input.language + search = ProjectSearch( + query=user_input.query, user=request.user, **kwargs ) - results = project_search.execute() - facets = results.facets + + elif user_input.type == 'domain': + search = DomainSearch( + query=user_input.query, user=request.user, **kwargs + ) + elif user_input.type == 'file': - kwargs = {} - if user_input.project: - kwargs['projects_list'] = [user_input.project] - if user_input.version: - kwargs['versions_list'] = [user_input.version] + search = PageSearch( + query=user_input.query, user=request.user, **kwargs + ) - page_search = PageDocument.faceted_search( + elif user_input.type == 'all': + search = AllSearch( query=user_input.query, user=request.user, **kwargs ) - results = page_search.execute() - facets = results.facets + + results = search.execute() + facets = results.facets log.info( LOG_TEMPLATE.format( @@ -73,6 +102,14 @@ def elastic_search(request): ), ) + # Make sure our selected facets are displayed even when they return 0 results + for avail_facet in ALL_FACETS: + value = getattr(user_input, avail_facet) + if not value or avail_facet not in facets: + continue + if value not in [val[0] for val in facets[avail_facet]]: + facets[avail_facet].insert(0, (value, 0, True)) + if results: if user_input.type == 'file': # Change results to turn newlines in highlight into periods @@ -86,52 +123,16 @@ def elastic_search(request): log.debug('Search facets: %s', pformat(results.facets.to_dict())) template_vars = user_input._asdict() - template_vars.update({'results': results, 'facets': facets}) - return render( - request, - 'search/elastic_search.html', - template_vars, - ) - - -def elastic_project_search(request, project_slug): - """Use elastic search to search in a project.""" - queryset = Project.objects.protected(request.user) - project = get_object_or_404(queryset, slug=project_slug) - version_slug = request.GET.get('version', LATEST) - query = request.GET.get('q', None) - results = None - - if query: - kwargs = {} - kwargs['projects_list'] = [project.slug] - kwargs['versions_list'] = version_slug - - page_search = PageDocument.faceted_search( - query=query, user=request.user, **kwargs - ) - results = page_search.execute() + template_vars.update({ + 'results': results, + 'facets': facets, + }) - log.debug('Search results: %s', pformat(results.to_dict())) - log.debug('Search facets: %s', pformat(results.facets.to_dict())) - - log.info( - LOG_TEMPLATE.format( - user=request.user, - project=project or '', - type='inproject', - version=version_slug or '', - language='', - msg=query or '', - ), - ) + if project_slug: + template_vars.update({'project_obj': project_obj}) return render( request, - 'search/elastic_project_search.html', - { - 'project': project, - 'query': query, - 'results': results, - }, + 'search/elastic_search.html', + template_vars, ) diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index dc91a51c50c..c009cbb62bd 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -84,6 +84,7 @@ def INSTALLED_APPS(self): # noqa 'django_extensions', 'messages_extends', 'tastypie', + 'django_filters', 'django_elasticsearch_dsl', # our apps @@ -100,6 +101,7 @@ def INSTALLED_APPS(self): # noqa 'readthedocs.notifications', 'readthedocs.integrations', 'readthedocs.analytics', + 'readthedocs.domaindata', 'readthedocs.search', @@ -323,8 +325,9 @@ def USE_PROMOS(self): # noqa # CORS CORS_ORIGIN_REGEX_WHITELIST = ( - r'^http://(.+)\.readthedocs\.io$', - r'^https://(.+)\.readthedocs\.io$', + '(.*)localhost(.*)', + '^http://(.+)\.readthedocs\.io$', + '^https://(.+)\.readthedocs\.io$' ) # So people can post to their accounts CORS_ALLOW_CREDENTIALS = True @@ -355,6 +358,12 @@ def USE_PROMOS(self): # noqa ES_TASK_CHUNK_SIZE = 100 ES_INDEXES = { + 'domain': { + 'name': 'domain_index', + 'settings': {'number_of_shards': 2, + 'number_of_replicas': 0 + } + }, 'project': { 'name': 'project_index', 'settings': {'number_of_shards': 2, diff --git a/readthedocs/templates/search/elastic_project_search.html b/readthedocs/templates/search/elastic_project_search.html deleted file mode 100644 index 1ce48e6e2b1..00000000000 --- a/readthedocs/templates/search/elastic_project_search.html +++ /dev/null @@ -1,94 +0,0 @@ -{% extends "projects/base_project.html" %} - -{% load core_tags i18n %} - -{% block title %}{% blocktrans with query=query|default:"" %}Search: {{ query }} {% endblocktrans %}{% endblock %} - -{% block project_editing %} - {% with search_active="active" %} - {% include "core/project_bar.html" %} - {% endwith %} -{% endblock %} - - -{% block content %} -
-

Search in this project:

- - - - - - {% if query %} - -
-
- -
-

{% blocktrans with query=query|default:"" %}Results for {{ query }}{% endblocktrans %}

-
- -
-
- -
    - {% for result in results %} -
  • -

    - {{ result.project }} - {{ result.title|safe }} -

    - {% for fragment in result.meta.highlight.content|slice:":3" %} -

    - {{ fragment|safe }} -

    - {% endfor %} -
  • - {% empty %} -
  • {% trans "No results found. Bummer." %}
  • - {% endfor %} - -
- -
-
- -
-
- - - {% if page.has_previous or page.has_next %} - - - - {% endif %} - - {% else %} - {# Show some example queries to run, maybe query syntax, something else? #} - {% endif %} - -
-{% endblock %} diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html index f46ef4f3a17..dd27642cee5 100644 --- a/readthedocs/templates/search/elastic_search.html +++ b/readthedocs/templates/search/elastic_search.html @@ -1,6 +1,6 @@ {% extends "projects/base_project.html" %} -{% load core_tags i18n %} +{% load core_tags i18n static %} {% block title %}{% blocktrans with query=query|default:"" %}Search: {{ query }} {% endblocktrans %}{% endblock %} @@ -13,16 +13,86 @@ {% endblock %} +{% block extra_scripts %} + + +{% endblock %} + +{% block project_editing %} + {% if project_obj %} + {% with search_active="active" project=project_obj %} + {% include "core/project_bar.html" %} + {% endwith %} + {% endif %} +{% endblock %} + {% block content %}