Skip to content

Commit 076e99f

Browse files
committed
Merge remote-tracking branch 'upstream/main'
2 parents 0c9d222 + 3c6fc25 commit 076e99f

File tree

55 files changed

+540
-201
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+540
-201
lines changed

.devcontainer/devcontainer.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
},
99
"ghcr.io/devcontainers/features/git-lfs:1.1.0": {},
1010
"ghcr.io/devcontainers-contrib/features/poetry:2": {},
11-
"ghcr.io/devcontainers/features/python:1": {}
11+
"ghcr.io/devcontainers/features/python:1": {
12+
"version": "3.12"
13+
}
1214
},
1315
"customizations": {
1416
"vscode": {

docs/content/administration/config-cheat-sheet.en-us.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,7 @@ Default templates for project boards:
832832
## Issue and pull request attachments (`attachment`)
833833

834834
- `ENABLED`: **true**: Whether issue and pull request attachments are enabled.
835-
- `ALLOWED_TYPES`: **.csv,.docx,.fodg,.fodp,.fods,.fodt,.gif,.gz,.jpeg,.jpg,.log,.md,.mov,.mp4,.odf,.odg,.odp,.ods,.odt,.patch,.pdf,.png,.pptx,.svg,.tgz,.txt,.webm,.xls,.xlsx,.zip**: Comma-separated list of allowed file extensions (`.zip`), mime types (`text/plain`) or wildcard type (`image/*`, `audio/*`, `video/*`). Empty value or `*/*` allows all types.
835+
- `ALLOWED_TYPES`: **.cpuprofile,.csv,.dmp,.docx,.fodg,.fodp,.fods,.fodt,.gif,.gz,.jpeg,.jpg,.json,.jsonc,.log,.md,.mov,.mp4,.odf,.odg,.odp,.ods,.odt,.patch,.pdf,.png,.pptx,.svg,.tgz,.txt,.webm,.xls,.xlsx,.zip**: Comma-separated list of allowed file extensions (`.zip`), mime types (`text/plain`) or wildcard type (`image/*`, `audio/*`, `video/*`). Empty value or `*/*` allows all types.
836836
- `MAX_SIZE`: **2048**: Maximum size (MB).
837837
- `MAX_FILES`: **5**: Maximum number of attachments that can be uploaded at once.
838838
- `STORAGE_TYPE`: **local**: Storage type for attachments, `local` for local disk or `minio` for s3 compatible object storage service, default is `local` or other name defined with `[storage.xxx]`

docs/content/administration/config-cheat-sheet.zh-cn.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,7 +782,7 @@ Gitea 创建以下非唯一队列:
782782
## 工单和合并请求的附件 (`attachment`)
783783

784784
- `ENABLED`: **true**: 是否允许用户上传附件。
785-
- `ALLOWED_TYPES`: **.csv,.docx,.fodg,.fodp,.fods,.fodt,.gif,.gz,.jpeg,.jpg,.log,.md,.mov,.mp4,.odf,.odg,.odp,.ods,.odt,.patch,.pdf,.png,.pptx,.svg,.tgz,.txt,.webm,.xls,.xlsx,.zip**: 允许的文件扩展名(`.zip`)、mime 类型(`text/plain`)或通配符类型(`image/*``audio/*``video/*`)的逗号分隔列表。空值或 `*/*` 允许所有类型。
785+
- `ALLOWED_TYPES`: **.cpuprofile,.csv,.dmp,.docx,.fodg,.fodp,.fods,.fodt,.gif,.gz,.jpeg,.jpg,.json,.jsonc,.log,.md,.mov,.mp4,.odf,.odg,.odp,.ods,.odt,.patch,.pdf,.png,.pptx,.svg,.tgz,.txt,.webm,.xls,.xlsx,.zip**: 允许的文件扩展名(`.zip`)、mime 类型(`text/plain`)或通配符类型(`image/*``audio/*``video/*`)的逗号分隔列表。空值或 `*/*` 允许所有类型。
786786
- `MAX_SIZE`: **2048**: 附件的最大限制(MB)。
787787
- `MAX_FILES`: **5**: 一次最多上传的附件数量。
788788
- `STORAGE_TYPE`: **local**: 附件的存储类型,`local` 表示本地磁盘,`minio` 表示兼容 S3 的对象存储服务,如果未设置将使用默认值 `local` 或其他在 `[storage.xxx]` 中定义的名称。

modules/indexer/code/bleve/bleve.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -233,21 +233,21 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {
233233

234234
// Search searches for files in the specified repo.
235235
// Returns the matching file-paths
236-
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
236+
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
237237
var (
238238
indexerQuery query.Query
239239
keywordQuery query.Query
240240
)
241241

242-
if isMatch {
243-
prefixQuery := bleve.NewPrefixQuery(keyword)
244-
prefixQuery.FieldVal = "Content"
245-
keywordQuery = prefixQuery
246-
} else {
242+
if isFuzzy {
247243
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
248244
phraseQuery.FieldVal = "Content"
249245
phraseQuery.Analyzer = repoIndexerAnalyzer
250246
keywordQuery = phraseQuery
247+
} else {
248+
prefixQuery := bleve.NewPrefixQuery(keyword)
249+
prefixQuery.FieldVal = "Content"
250+
keywordQuery = prefixQuery
251251
}
252252

253253
if len(repoIDs) > 0 {

modules/indexer/code/elasticsearch/elasticsearch.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,10 +281,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
281281
}
282282

283283
// Search searches for codes and language stats by given conditions.
284-
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
285-
searchType := esMultiMatchTypeBestFields
286-
if isMatch {
287-
searchType = esMultiMatchTypePhrasePrefix
284+
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
285+
searchType := esMultiMatchTypePhrasePrefix
286+
if isFuzzy {
287+
searchType = esMultiMatchTypeBestFields
288288
}
289289

290290
kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)

modules/indexer/code/git.go

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,9 @@ func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision s
9191
return nil, runErr
9292
}
9393

94+
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
95+
9496
var err error
95-
objectFormat, err := git.GetObjectFormatOfRepo(ctx, repo.RepoPath())
96-
if err != nil {
97-
return nil, err
98-
}
9997
changes.Updates, err = parseGitLsTreeOutput(objectFormat, stdout)
10098
return &changes, err
10199
}
@@ -174,10 +172,8 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio
174172
return nil, err
175173
}
176174

177-
objectFormat, err := git.GetObjectFormatOfRepo(ctx, repo.RepoPath())
178-
if err != nil {
179-
return nil, err
180-
}
175+
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
176+
181177
changes.Updates, err = parseGitLsTreeOutput(objectFormat, lsTreeStdout)
182178
return &changes, err
183179
}

modules/indexer/code/indexer_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
7070

7171
for _, kw := range keywords {
7272
t.Run(kw.Keyword, func(t *testing.T) {
73-
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, false)
73+
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
7474
assert.NoError(t, err)
7575
assert.Len(t, kw.IDs, int(total))
7676
assert.Len(t, langs, kw.Langs)

modules/indexer/code/internal/indexer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ type Indexer interface {
1616
internal.Indexer
1717
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
1818
Delete(ctx context.Context, repoID int64) error
19-
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
19+
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
2020
}
2121

2222
// NewDummyIndexer returns a dummy indexer
@@ -38,6 +38,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
3838
return fmt.Errorf("indexer is not ready")
3939
}
4040

41-
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
41+
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
4242
return 0, nil, nil, fmt.Errorf("indexer is not ready")
4343
}

modules/indexer/code/search.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,13 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
124124
}
125125

126126
// PerformSearch perform a search on a repository
127-
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
127+
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
128+
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
128129
if len(keyword) == 0 {
129130
return 0, nil, nil, nil
130131
}
131132

132-
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch)
133+
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
133134
if err != nil {
134135
return 0, nil, nil, err
135136
}

modules/indexer/internal/bleve/query.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,13 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQue
2525
return q
2626
}
2727

28+
// PrefixQuery generates a match prefix query for the given prefix and field
29+
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
30+
q := bleve.NewPrefixQuery(matchPrefix)
31+
q.FieldVal = field
32+
return q
33+
}
34+
2835
// BoolFieldQuery generates a bool field query for the given value and field
2936
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
3037
q := bleve.NewBoolFieldQuery(value)

modules/indexer/issues/bleve/bleve.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,19 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
156156
var queries []query.Query
157157

158158
if options.Keyword != "" {
159-
keywordQueries := []query.Query{
160-
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
161-
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
162-
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
159+
if options.IsFuzzyKeyword {
160+
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
161+
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
162+
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
163+
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
164+
}...))
165+
} else {
166+
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
167+
inner_bleve.PrefixQuery(options.Keyword, "title"),
168+
inner_bleve.PrefixQuery(options.Keyword, "content"),
169+
inner_bleve.PrefixQuery(options.Keyword, "comments"),
170+
}...))
163171
}
164-
queries = append(queries, bleve.NewDisjunctionQuery(keywordQueries...))
165172
}
166173

167174
if len(options.RepoIDs) > 0 || options.AllPublic {

modules/indexer/issues/elasticsearch/elasticsearch.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ import (
1919

2020
const (
2121
issueIndexerLatestVersion = 1
22+
// multi-match-types, currently only 2 types are used
23+
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
24+
esMultiMatchTypeBestFields = "best_fields"
25+
esMultiMatchTypePhrasePrefix = "phrase_prefix"
2226
)
2327

2428
var _ internal.Indexer = &Indexer{}
@@ -141,7 +145,13 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
141145
query := elastic.NewBoolQuery()
142146

143147
if options.Keyword != "" {
144-
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments"))
148+
149+
searchType := esMultiMatchTypePhrasePrefix
150+
if options.IsFuzzyKeyword {
151+
searchType = esMultiMatchTypeBestFields
152+
}
153+
154+
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType))
145155
}
146156

147157
if len(options.RepoIDs) > 0 {

modules/indexer/issues/internal/model.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ type SearchResult struct {
7474
type SearchOptions struct {
7575
Keyword string // keyword to search
7676

77+
IsFuzzyKeyword bool // if false the levenshtein distance is 0
78+
7779
RepoIDs []int64 // repository IDs which the issues belong to
7880
AllPublic bool // if include all public repositories
7981

modules/indexer/issues/meilisearch/meilisearch.go

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package meilisearch
55

66
import (
77
"context"
8+
"errors"
89
"strconv"
910
"strings"
1011

@@ -16,12 +17,15 @@ import (
1617
)
1718

1819
const (
19-
issueIndexerLatestVersion = 2
20+
issueIndexerLatestVersion = 3
2021

2122
// TODO: make this configurable if necessary
2223
maxTotalHits = 10000
2324
)
2425

26+
// ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types.
27+
var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content")
28+
2529
var _ internal.Indexer = &Indexer{}
2630

2731
// Indexer implements Indexer interface
@@ -47,6 +51,9 @@ func NewIndexer(url, apiKey, indexerName string) *Indexer {
4751
},
4852
DisplayedAttributes: []string{
4953
"id",
54+
"title",
55+
"content",
56+
"comments",
5057
},
5158
FilterableAttributes: []string{
5259
"repo_id",
@@ -221,11 +228,9 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
221228
return nil, err
222229
}
223230

224-
hits := make([]internal.Match, 0, len(searchRes.Hits))
225-
for _, hit := range searchRes.Hits {
226-
hits = append(hits, internal.Match{
227-
ID: int64(hit.(map[string]any)["id"].(float64)),
228-
})
231+
hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword)
232+
if err != nil {
233+
return nil, err
229234
}
230235

231236
return &internal.SearchResult{
@@ -241,3 +246,77 @@ func parseSortBy(sortBy internal.SortBy) string {
241246
}
242247
return field + ":asc"
243248
}
249+
250+
// nonFuzzyWorkaround is needed as meilisearch does not have an exact search
251+
// and you can only change "typo tolerance" per index. So we have to post-filter the results
252+
// https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance
253+
// TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed
254+
func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) {
255+
hits := make([]internal.Match, 0, len(searchRes.Hits))
256+
for _, hit := range searchRes.Hits {
257+
hit, ok := hit.(map[string]any)
258+
if !ok {
259+
return nil, ErrMalformedResponse
260+
}
261+
262+
if !isFuzzy {
263+
keyword = strings.ToLower(keyword)
264+
265+
// declare a anon func to check if the title, content or at least one comment contains the keyword
266+
found, err := func() (bool, error) {
267+
// check if title match first
268+
title, ok := hit["title"].(string)
269+
if !ok {
270+
return false, ErrMalformedResponse
271+
} else if strings.Contains(strings.ToLower(title), keyword) {
272+
return true, nil
273+
}
274+
275+
// check if content has a match
276+
content, ok := hit["content"].(string)
277+
if !ok {
278+
return false, ErrMalformedResponse
279+
} else if strings.Contains(strings.ToLower(content), keyword) {
280+
return true, nil
281+
}
282+
283+
// now check for each comment if one has a match
284+
// so we first try to cast and skip if there are no comments
285+
comments, ok := hit["comments"].([]any)
286+
if !ok {
287+
return false, ErrMalformedResponse
288+
} else if len(comments) == 0 {
289+
return false, nil
290+
}
291+
292+
// now we iterate over all and report as soon as we detect one match
293+
for i := range comments {
294+
comment, ok := comments[i].(string)
295+
if !ok {
296+
return false, ErrMalformedResponse
297+
}
298+
if strings.Contains(strings.ToLower(comment), keyword) {
299+
return true, nil
300+
}
301+
}
302+
303+
// we got no match
304+
return false, nil
305+
}()
306+
307+
if err != nil {
308+
return nil, err
309+
} else if !found {
310+
continue
311+
}
312+
}
313+
issueID, ok := hit["id"].(float64)
314+
if !ok {
315+
return nil, ErrMalformedResponse
316+
}
317+
hits = append(hits, internal.Match{
318+
ID: int64(issueID),
319+
})
320+
}
321+
return hits, nil
322+
}

modules/indexer/issues/meilisearch/meilisearch_test.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@ import (
1010
"testing"
1111
"time"
1212

13+
"code.gitea.io/gitea/modules/indexer/issues/internal"
1314
"code.gitea.io/gitea/modules/indexer/issues/internal/tests"
15+
16+
"github.com/meilisearch/meilisearch-go"
17+
"github.com/stretchr/testify/assert"
1418
)
1519

1620
func TestMeilisearchIndexer(t *testing.T) {
@@ -48,3 +52,44 @@ func TestMeilisearchIndexer(t *testing.T) {
4852

4953
tests.TestIndexer(t, indexer)
5054
}
55+
56+
func TestNonFuzzyWorkaround(t *testing.T) {
57+
// get unexpected return
58+
_, err := nonFuzzyWorkaround(&meilisearch.SearchResponse{
59+
Hits: []any{"aa", "bb", "cc", "dd"},
60+
}, "bowling", false)
61+
assert.ErrorIs(t, err, ErrMalformedResponse)
62+
63+
validResponse := &meilisearch.SearchResponse{
64+
Hits: []any{
65+
map[string]any{
66+
"id": float64(11),
67+
"title": "a title",
68+
"content": "issue body with no match",
69+
"comments": []any{"hey whats up?", "I'm currently bowling", "nice"},
70+
},
71+
map[string]any{
72+
"id": float64(22),
73+
"title": "Bowling as title",
74+
"content": "",
75+
"comments": []any{},
76+
},
77+
map[string]any{
78+
"id": float64(33),
79+
"title": "Bowl-ing as fuzzy match",
80+
"content": "",
81+
"comments": []any{},
82+
},
83+
},
84+
}
85+
86+
// nonFuzzy
87+
hits, err := nonFuzzyWorkaround(validResponse, "bowling", false)
88+
assert.NoError(t, err)
89+
assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}}, hits)
90+
91+
// fuzzy
92+
hits, err = nonFuzzyWorkaround(validResponse, "bowling", true)
93+
assert.NoError(t, err)
94+
assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits)
95+
}

0 commit comments

Comments
 (0)