gitea源码

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. // Copyright 2024 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package zstd
  4. import (
  5. "bytes"
  6. "io"
  7. "os"
  8. "path/filepath"
  9. "strings"
  10. "testing"
  11. "github.com/stretchr/testify/assert"
  12. "github.com/stretchr/testify/require"
  13. )
  14. func TestWriterReader(t *testing.T) {
  15. testData := prepareTestData(t, 1_000_000)
  16. result := bytes.NewBuffer(nil)
  17. t.Run("regular", func(t *testing.T) {
  18. result.Reset()
  19. writer, err := NewWriter(result)
  20. require.NoError(t, err)
  21. _, err = io.Copy(writer, bytes.NewReader(testData))
  22. require.NoError(t, err)
  23. require.NoError(t, writer.Close())
  24. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  25. reader, err := NewReader(result)
  26. require.NoError(t, err)
  27. data, err := io.ReadAll(reader)
  28. require.NoError(t, err)
  29. require.NoError(t, reader.Close())
  30. assert.Equal(t, testData, data)
  31. })
  32. t.Run("with options", func(t *testing.T) {
  33. result.Reset()
  34. writer, err := NewWriter(result, WithEncoderLevel(SpeedBestCompression))
  35. require.NoError(t, err)
  36. _, err = io.Copy(writer, bytes.NewReader(testData))
  37. require.NoError(t, err)
  38. require.NoError(t, writer.Close())
  39. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  40. reader, err := NewReader(result, WithDecoderLowmem(true))
  41. require.NoError(t, err)
  42. data, err := io.ReadAll(reader)
  43. require.NoError(t, err)
  44. require.NoError(t, reader.Close())
  45. assert.Equal(t, testData, data)
  46. })
  47. }
  48. func TestSeekableWriterReader(t *testing.T) {
  49. testData := prepareTestData(t, 2_000_000)
  50. result := bytes.NewBuffer(nil)
  51. t.Run("regular", func(t *testing.T) {
  52. result.Reset()
  53. blockSize := 100_000
  54. writer, err := NewSeekableWriter(result, blockSize)
  55. require.NoError(t, err)
  56. _, err = io.Copy(writer, bytes.NewReader(testData))
  57. require.NoError(t, err)
  58. require.NoError(t, writer.Close())
  59. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  60. reader, err := NewSeekableReader(bytes.NewReader(result.Bytes()))
  61. require.NoError(t, err)
  62. data, err := io.ReadAll(reader)
  63. require.NoError(t, err)
  64. require.NoError(t, reader.Close())
  65. assert.Equal(t, testData, data)
  66. })
  67. t.Run("seek read", func(t *testing.T) {
  68. result.Reset()
  69. blockSize := 100_000
  70. writer, err := NewSeekableWriter(result, blockSize)
  71. require.NoError(t, err)
  72. _, err = io.Copy(writer, bytes.NewReader(testData))
  73. require.NoError(t, err)
  74. require.NoError(t, writer.Close())
  75. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  76. assertReader := &assertReadSeeker{r: bytes.NewReader(result.Bytes())}
  77. reader, err := NewSeekableReader(assertReader)
  78. require.NoError(t, err)
  79. _, err = reader.Seek(1_000_000, io.SeekStart)
  80. require.NoError(t, err)
  81. data := make([]byte, 1000)
  82. _, err = io.ReadFull(reader, data)
  83. require.NoError(t, err)
  84. require.NoError(t, reader.Close())
  85. assert.Equal(t, testData[1_000_000:1_000_000+1000], data)
  86. // Should seek 3 times,
  87. // the first two times are for getting the index,
  88. // and the third time is for reading the data.
  89. assert.Equal(t, 3, assertReader.SeekTimes)
  90. // Should read less than 2 blocks,
  91. // even if the compression ratio is not good and the data is not in the same block.
  92. assert.Less(t, assertReader.ReadBytes, blockSize*2)
  93. // Should close the underlying reader if it is Closer.
  94. assert.True(t, assertReader.Closed)
  95. })
  96. t.Run("tidy data", func(t *testing.T) {
  97. testData := prepareTestData(t, 1000) // data size is less than a block
  98. result.Reset()
  99. blockSize := 100_000
  100. writer, err := NewSeekableWriter(result, blockSize)
  101. require.NoError(t, err)
  102. _, err = io.Copy(writer, bytes.NewReader(testData))
  103. require.NoError(t, err)
  104. require.NoError(t, writer.Close())
  105. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  106. reader, err := NewSeekableReader(bytes.NewReader(result.Bytes()))
  107. require.NoError(t, err)
  108. data, err := io.ReadAll(reader)
  109. require.NoError(t, err)
  110. require.NoError(t, reader.Close())
  111. assert.Equal(t, testData, data)
  112. })
  113. t.Run("tidy block", func(t *testing.T) {
  114. result.Reset()
  115. blockSize := 100
  116. writer, err := NewSeekableWriter(result, blockSize)
  117. require.NoError(t, err)
  118. _, err = io.Copy(writer, bytes.NewReader(testData))
  119. require.NoError(t, err)
  120. require.NoError(t, writer.Close())
  121. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  122. // A too small block size will cause a bad compression rate,
  123. // even the compressed data is larger than the original data.
  124. assert.Greater(t, result.Len(), len(testData))
  125. reader, err := NewSeekableReader(bytes.NewReader(result.Bytes()))
  126. require.NoError(t, err)
  127. data, err := io.ReadAll(reader)
  128. require.NoError(t, err)
  129. require.NoError(t, reader.Close())
  130. assert.Equal(t, testData, data)
  131. })
  132. t.Run("compatible reader", func(t *testing.T) {
  133. result.Reset()
  134. blockSize := 100_000
  135. writer, err := NewSeekableWriter(result, blockSize)
  136. require.NoError(t, err)
  137. _, err = io.Copy(writer, bytes.NewReader(testData))
  138. require.NoError(t, err)
  139. require.NoError(t, writer.Close())
  140. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  141. // It should be able to read the data with a regular reader.
  142. reader, err := NewReader(bytes.NewReader(result.Bytes()))
  143. require.NoError(t, err)
  144. data, err := io.ReadAll(reader)
  145. require.NoError(t, err)
  146. require.NoError(t, reader.Close())
  147. assert.Equal(t, testData, data)
  148. })
  149. t.Run("wrong reader", func(t *testing.T) {
  150. result.Reset()
  151. // Use a regular writer to compress the data.
  152. writer, err := NewWriter(result)
  153. require.NoError(t, err)
  154. _, err = io.Copy(writer, bytes.NewReader(testData))
  155. require.NoError(t, err)
  156. require.NoError(t, writer.Close())
  157. t.Logf("original size: %d, compressed size: %d, rate: %.2f%%", len(testData), result.Len(), float64(result.Len())/float64(len(testData))*100)
  158. // But use a seekable reader to read the data, it should fail.
  159. _, err = NewSeekableReader(bytes.NewReader(result.Bytes()))
  160. require.Error(t, err)
  161. })
  162. }
  163. // prepareTestData prepares test data to test compression.
  164. // Random data is not suitable for testing compression,
  165. // so it collects code files from the project to get enough data.
  166. func prepareTestData(t *testing.T, size int) []byte {
  167. // .../gitea/modules/zstd
  168. dir, err := os.Getwd()
  169. require.NoError(t, err)
  170. // .../gitea/
  171. dir = filepath.Join(dir, "../../")
  172. textExt := []string{".go", ".tmpl", ".ts", ".yml", ".css"} // add more if not enough data collected
  173. isText := func(info os.FileInfo) bool {
  174. if info.Size() == 0 {
  175. return false
  176. }
  177. for _, ext := range textExt {
  178. if strings.HasSuffix(info.Name(), ext) {
  179. return true
  180. }
  181. }
  182. return false
  183. }
  184. ret := make([]byte, size)
  185. n := 0
  186. count := 0
  187. queue := []string{dir}
  188. for len(queue) > 0 && n < size {
  189. file := queue[0]
  190. queue = queue[1:]
  191. info, err := os.Stat(file)
  192. require.NoError(t, err)
  193. if info.IsDir() {
  194. entries, err := os.ReadDir(file)
  195. require.NoError(t, err)
  196. for _, entry := range entries {
  197. queue = append(queue, filepath.Join(file, entry.Name()))
  198. }
  199. continue
  200. }
  201. if !isText(info) { // text file only
  202. continue
  203. }
  204. data, err := os.ReadFile(file)
  205. require.NoError(t, err)
  206. n += copy(ret[n:], data)
  207. count++
  208. }
  209. if n < size {
  210. require.Failf(t, "Not enough data", "Only %d bytes collected from %d files", n, count)
  211. }
  212. return ret
  213. }
  214. type assertReadSeeker struct {
  215. r io.ReadSeeker
  216. SeekTimes int
  217. ReadBytes int
  218. Closed bool
  219. }
  220. func (a *assertReadSeeker) Read(p []byte) (int, error) {
  221. n, err := a.r.Read(p)
  222. a.ReadBytes += n
  223. return n, err
  224. }
  225. func (a *assertReadSeeker) Seek(offset int64, whence int) (int64, error) {
  226. a.SeekTimes++
  227. return a.r.Seek(offset, whence)
  228. }
  229. func (a *assertReadSeeker) Close() error {
  230. a.Closed = true
  231. return nil
  232. }