gitea源码

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package csv
  4. import (
  5. "bytes"
  6. "encoding/csv"
  7. "io"
  8. "strconv"
  9. "strings"
  10. "testing"
  11. "code.gitea.io/gitea/modules/markup"
  12. "code.gitea.io/gitea/modules/translation"
  13. "github.com/stretchr/testify/assert"
  14. )
  15. func TestCreateReader(t *testing.T) {
  16. rd := CreateReader(bytes.NewReader([]byte{}), ',')
  17. assert.Equal(t, ',', rd.Comma)
  18. }
  19. func decodeSlashes(t *testing.T, s string) string {
  20. s = strings.ReplaceAll(s, "\n", "\\n")
  21. s = strings.ReplaceAll(s, "\"", "\\\"")
  22. decoded, err := strconv.Unquote(`"` + s + `"`)
  23. assert.NoError(t, err, "unable to decode string")
  24. return decoded
  25. }
  26. func TestCreateReaderAndDetermineDelimiter(t *testing.T) {
  27. cases := []struct {
  28. csv string
  29. expectedRows [][]string
  30. expectedDelimiter rune
  31. }{
  32. // case 0 - semicolon delimited
  33. {
  34. csv: `a;b;c
  35. 1;2;3
  36. 4;5;6`,
  37. expectedRows: [][]string{
  38. {"a", "b", "c"},
  39. {"1", "2", "3"},
  40. {"4", "5", "6"},
  41. },
  42. expectedDelimiter: ';',
  43. },
  44. // case 1 - tab delimited with empty fields
  45. {
  46. csv: `col1 col2 col3
  47. a, b c
  48. e f
  49. g h i
  50. j l
  51. m n,\t
  52. p q r
  53. u
  54. v w x
  55. y\t\t
  56. `,
  57. expectedRows: [][]string{
  58. {"col1", "col2", "col3"},
  59. {"a,", "b", "c"},
  60. {"", "e", "f"},
  61. {"g", "h", "i"},
  62. {"j", "", "l"},
  63. {"m", "n,", ""},
  64. {"p", "q", "r"},
  65. {"", "", "u"},
  66. {"v", "w", "x"},
  67. {"y", "", ""},
  68. {"", "", ""},
  69. },
  70. expectedDelimiter: '\t',
  71. },
  72. // case 2 - comma delimited with leading spaces
  73. {
  74. csv: ` col1,col2,col3
  75. a, b, c
  76. d,e,f
  77. ,h, i
  78. j, ,\x20
  79. , , `,
  80. expectedRows: [][]string{
  81. {"col1", "col2", "col3"},
  82. {"a", "b", "c"},
  83. {"d", "e", "f"},
  84. {"", "h", "i"},
  85. {"j", "", ""},
  86. {"", "", ""},
  87. },
  88. expectedDelimiter: ',',
  89. },
  90. // case 3 - every delimiter used, default to comma and handle differing number of fields per record
  91. {
  92. csv: `col1,col2
  93. a;b
  94. c@e
  95. f g
  96. h|i
  97. jkl`,
  98. expectedRows: [][]string{
  99. {"col1", "col2"},
  100. {"a;b"},
  101. {"c@e"},
  102. {"f g"},
  103. {"h|i"},
  104. {"jkl"},
  105. },
  106. expectedDelimiter: ',',
  107. },
  108. }
  109. for n, c := range cases {
  110. rd, err := CreateReaderAndDetermineDelimiter(nil, strings.NewReader(decodeSlashes(t, c.csv)))
  111. assert.NoError(t, err, "case %d: should not throw error: %v\n", n, err)
  112. assert.Equal(t, c.expectedDelimiter, rd.Comma, "case %d: delimiter should be '%c', got '%c'", n, c.expectedDelimiter, rd.Comma)
  113. rows, err := rd.ReadAll()
  114. assert.NoError(t, err, "case %d: should not throw error: %v\n", n, err)
  115. assert.Equal(t, c.expectedRows, rows, "case %d: rows should be equal", n)
  116. }
  117. }
  118. type mockReader struct{}
  119. func (r *mockReader) Read(buf []byte) (int, error) {
  120. return 0, io.ErrShortBuffer
  121. }
  122. func TestDetermineDelimiterShortBufferError(t *testing.T) {
  123. rd, err := CreateReaderAndDetermineDelimiter(nil, &mockReader{})
  124. assert.Error(t, err, "CreateReaderAndDetermineDelimiter() should throw an error")
  125. assert.ErrorIs(t, err, io.ErrShortBuffer)
  126. assert.Nil(t, rd, "CSV reader should be mnil")
  127. }
  128. func TestDetermineDelimiter(t *testing.T) {
  129. cases := []struct {
  130. csv string
  131. filename string
  132. expectedDelimiter rune
  133. }{
  134. // case 0 - semicolon delmited
  135. {
  136. csv: "a",
  137. filename: "test.csv",
  138. expectedDelimiter: ',',
  139. },
  140. // case 1 - single column/row CSV
  141. {
  142. csv: "a",
  143. filename: "",
  144. expectedDelimiter: ',',
  145. },
  146. // case 2 - single column, single row CSV w/ tsv file extension (so is tabbed delimited)
  147. {
  148. csv: "1,2",
  149. filename: "test.tsv",
  150. expectedDelimiter: '\t',
  151. },
  152. // case 3 - two column, single row CSV w/ no filename, so will guess comma as delimiter
  153. {
  154. csv: "1,2",
  155. filename: "",
  156. expectedDelimiter: ',',
  157. },
  158. // case 4 - semi-colon delimited with csv extension
  159. {
  160. csv: "1;2",
  161. filename: "test.csv",
  162. expectedDelimiter: ';',
  163. },
  164. // case 5 - tabbed delimited with tsv extension
  165. {
  166. csv: "1\t2",
  167. filename: "test.tsv",
  168. expectedDelimiter: '\t',
  169. },
  170. // case 6 - tabbed delimited without any filename
  171. {
  172. csv: "1\t2",
  173. filename: "",
  174. expectedDelimiter: '\t',
  175. },
  176. // case 7 - tabs won't work, only commas as every row has same amount of commas
  177. {
  178. csv: "col1,col2\nfirst\tval,seconed\tval",
  179. filename: "",
  180. expectedDelimiter: ',',
  181. },
  182. // case 8 - While looks like comma delimited, has psv extension
  183. {
  184. csv: "1,2",
  185. filename: "test.psv",
  186. expectedDelimiter: '|',
  187. },
  188. // case 9 - pipe delmiited with no extension
  189. {
  190. csv: "1|2",
  191. filename: "",
  192. expectedDelimiter: '|',
  193. },
  194. // case 10 - semi-colon delimited with commas in values
  195. {
  196. csv: "1,2,3;4,5,6;7,8,9\na;b;c",
  197. filename: "",
  198. expectedDelimiter: ';',
  199. },
  200. // case 11 - semi-colon delimited with newline in content
  201. {
  202. csv: `"1,2,3,4";"a
  203. b";%
  204. c;d;#`,
  205. filename: "",
  206. expectedDelimiter: ';',
  207. },
  208. // case 12 - HTML as single value
  209. {
  210. csv: "<br/>",
  211. filename: "",
  212. expectedDelimiter: ',',
  213. },
  214. // case 13 - tab delimited with commas in values
  215. {
  216. csv: `name email note
  217. John Doe john@doe.com This,note,had,a,lot,of,commas,to,test,delimiters`,
  218. filename: "",
  219. expectedDelimiter: '\t',
  220. },
  221. }
  222. for n, c := range cases {
  223. delimiter := determineDelimiter(markup.NewRenderContext(t.Context()).WithRelativePath(c.filename), []byte(decodeSlashes(t, c.csv)))
  224. assert.Equal(t, c.expectedDelimiter, delimiter, "case %d: delimiter should be equal, expected '%c' got '%c'", n, c.expectedDelimiter, delimiter)
  225. }
  226. }
  227. func TestRemoveQuotedString(t *testing.T) {
  228. cases := []struct {
  229. text string
  230. expectedText string
  231. }{
  232. // case 0 - quoted text with escaped quotes in 1st column
  233. {
  234. text: `col1,col2,col3
  235. "quoted ""text"" with
  236. new lines
  237. in first column",b,c`,
  238. expectedText: `col1,col2,col3
  239. ,b,c`,
  240. },
  241. // case 1 - quoted text with escaped quotes in 2nd column
  242. {
  243. text: `col1,col2,col3
  244. a,"quoted ""text"" with
  245. new lines
  246. in second column",c`,
  247. expectedText: `col1,col2,col3
  248. a,,c`,
  249. },
  250. // case 2 - quoted text with escaped quotes in last column
  251. {
  252. text: `col1,col2,col3
  253. a,b,"quoted ""text"" with
  254. new lines
  255. in last column"`,
  256. expectedText: `col1,col2,col3
  257. a,b,`,
  258. },
  259. // case 3 - csv with lots of quotes
  260. {
  261. text: `a,"b",c,d,"e
  262. e
  263. e",f
  264. a,bb,c,d,ee ,"f
  265. f"
  266. a,b,"c ""
  267. c",d,e,f`,
  268. expectedText: `a,,c,d,,f
  269. a,bb,c,d,ee ,
  270. a,b,,d,e,f`,
  271. },
  272. // case 4 - csv with pipes and quotes
  273. {
  274. text: `Col1 | Col2 | Col3
  275. abc | "Hello
  276. World"|123
  277. "de
  278. f" | 4.56 | 789`,
  279. expectedText: `Col1 | Col2 | Col3
  280. abc | |123
  281. | 4.56 | 789`,
  282. },
  283. }
  284. for n, c := range cases {
  285. modifiedText := removeQuotedString(decodeSlashes(t, c.text))
  286. assert.Equal(t, c.expectedText, modifiedText, "case %d: modified text should be equal", n)
  287. }
  288. }
  289. func TestGuessDelimiter(t *testing.T) {
  290. cases := []struct {
  291. csv string
  292. expectedDelimiter rune
  293. }{
  294. // case 0 - single cell, comma delmited
  295. {
  296. csv: "a",
  297. expectedDelimiter: ',',
  298. },
  299. // case 1 - two cells, comma delimited
  300. {
  301. csv: "1,2",
  302. expectedDelimiter: ',',
  303. },
  304. // case 2 - semicolon delimited
  305. {
  306. csv: "1;2",
  307. expectedDelimiter: ';',
  308. },
  309. // case 3 - tab delimited
  310. {
  311. csv: "1\t2",
  312. expectedDelimiter: '\t',
  313. },
  314. // case 4 - pipe delimited
  315. {
  316. csv: "1|2",
  317. expectedDelimiter: '|',
  318. },
  319. // case 5 - semicolon delimited with commas in text
  320. {
  321. csv: `1,2,3;4,5,6;7,8,9
  322. a;b;c`,
  323. expectedDelimiter: ';',
  324. },
  325. // case 6 - semicolon delmited with commas in quoted text
  326. {
  327. csv: `"1,2,3,4";"a
  328. b"
  329. c;d`,
  330. expectedDelimiter: ';',
  331. },
  332. // case 7 - HTML
  333. {
  334. csv: "<br/>",
  335. expectedDelimiter: ',',
  336. },
  337. // case 8 - tab delimited with commas in value
  338. {
  339. csv: `name email note
  340. John Doe john@doe.com This,note,had,a,lot,of,commas,to,test,delimiters`,
  341. expectedDelimiter: '\t',
  342. },
  343. // case 9 - tab delimited with new lines in values, commas in values
  344. {
  345. csv: `1 "some,""more
  346. ""
  347. quoted,
  348. text," a
  349. 2 "some,
  350. quoted,\t
  351. text," b
  352. 3 "some,
  353. quoted,
  354. text" c
  355. 4 "some,
  356. quoted,
  357. text," d`,
  358. expectedDelimiter: '\t',
  359. },
  360. // case 10 - semicolon delmited with quotes and semicolon in value
  361. {
  362. csv: `col1;col2
  363. "this has a literal "" in the text";"and an ; in the text"`,
  364. expectedDelimiter: ';',
  365. },
  366. // case 11 - pipe delimited with quotes
  367. {
  368. csv: `Col1 | Col2 | Col3
  369. abc | "Hello
  370. World"|123
  371. "de
  372. |
  373. f" | 4.56 | 789`,
  374. expectedDelimiter: '|',
  375. },
  376. // case 12 - a tab delimited 6 column CSV, but the values are not quoted and have lots of commas.
  377. // In the previous bestScore algorithm, this would have picked comma as the delimiter, but now it should guess tab
  378. {
  379. csv: `c1 c2 c3 c4 c5 c6
  380. v,k,x,v ym,f,oa,qn,uqijh,n,s,wvygpo uj,kt,j,w,i,fvv,tm,f,ddt,b,mwt,e,t,teq,rd,p,a e,wfuae,t,h,q,im,ix,y h,mrlu,l,dz,ff,zi,af,emh ,gov,bmfelvb,axp,f,u,i,cni,x,z,v,sh,w,jo,,m,h
  381. k,ohf,pgr,tde,m,s te,ek,,v,,ic,kqc,dv,w,oi,j,w,gojjr,ug,,l,j,zl g,qziq,bcajx,zfow,ka,j,re,ohbc k,nzm,qm,ts,auf th,elb,lx,l,q,e,qf asbr,z,k,y,tltobga
  382. g,m,bu,el h,l,jwi,o,wge,fy,rure,c,g,lcxu,fxte,uns,cl,s,o,t,h,rsoy,f bq,s,uov,z,ikkhgyg,,sabs,c,hzue mc,b,,j,t,n sp,mn,,m,t,dysi,eq,pigb,rfa,z w,rfli,sg,,o,wjjjf,f,wxdzfk,x,t,p,zy,p,mg,r,l,h
  383. e,ewbkc,nugd,jj,sf,ih,i,n,jo,b,poem,kw,q,i,x,t,e,uug,k j,xm,sch,ux,h,,fb,f,pq,,mh,,f,v,,oba,w,h,v,eiz,yzd,o,a,c,e,dhp,q a,pbef,epc,k,rdpuw,cw k,j,e,d xf,dz,sviv,w,sqnzew,t,b v,yg,f,cq,ti,g,m,ta,hm,ym,ii,hxy,p,z,r,e,ga,sfs,r,p,l,aar,w,kox,j
  384. l,d,v,pp,q,j,bxip,w,i,im,qa,o e,o h,w,a,a,qzj,nt,qfn,ut,fvhu,ts hu,q,g,p,q,ofpje,fsqa,frp,p,vih,j,w,k,jx, ln,th,ka,l,b,vgk,rv,hkx rj,v,y,cwm,rao,e,l,wvr,ptc,lm,yg,u,k,i,b,zk,b,gv,fls
  385. velxtnhlyuysbnlchosqlhkozkdapjaueexjwrndwb nglvnv kqiv pbshwlmcexdzipopxjyrxhvjalwp pydvipwlkkpdvbtepahskwuornbsb qwbacgq
  386. l,y,u,bf,y,m,eals,n,cop,h,g,vs,jga,opt x,b,zwmn,hh,b,n,pdj,t,d px yn,vtd,u,y,b,ps,yo,qqnem,mxg,m,al,rd,c,k,d,q,f ilxdxa,m,y,,p,p,y,prgmg,q,n,etj,k,ns b,pl,z,jq,hk
  387. p,gc jn,mzr,bw sb,e,r,dy,ur,wzy,r,c,n,yglr,jbdu,r,pqk,k q,d,,,p,l,euhl,dc,rwh,t,tq,z,h,p,s,t,x,fugr,h wi,zxb,jcig,o,t,k mfh,ym,h,e,p,cnvx,uv,zx,x,pq,blt,v,r,u,tr,g,g,xt
  388. nri,p,,t,if,,y,ptlqq a,i w,ovli,um,w,f,re,k,sb,w,jy,zf i,g,p,q,mii,nr,jm,cc i,szl,k,eg,l,d ,ah,w,b,vh
  389. ,,sh,wx,mn,xm,u,d,yy,u,t,m,j,s,b ogadq,g,y,y,i,h,ln,jda,g,cz,s,rv,r,s,s,le,r, y,nu,f,nagj o,h,,adfy,o,nf,ns,gvsvnub,k,b,xyz v,h,g,ef,y,gb c,x,cw,x,go,h,t,x,cu,u,qgrqzrcmn,kq,cd,g,rejp,zcq
  390. skxg,t,vay,d,wug,d,xg,sexc rt g,ag,mjq,fjnyji,iwa,m,ml,b,ua,b,qjxeoc be,s,sh,n,jbzxs,g,n,i,h,y,r,be,mfo,u,p cw,r,,u,zn,eg,r,yac,m,l,edkr,ha,x,g,b,c,tg,c j,ye,u,ejd,maj,ea,bm,u,iy`,
  391. expectedDelimiter: '\t',
  392. },
  393. // case 13 - a CSV with more than 10 lines and since we only use the first 10 lines, it should still get the delimiter as semicolon
  394. {
  395. csv: `col1;col2;col3
  396. 1;1;1
  397. 2;2;2
  398. 3;3;3
  399. 4;4;4
  400. 5;5;5
  401. 6;6;6
  402. 7;7;7
  403. 8;8;8
  404. 9;9;9
  405. 10;10;10
  406. 11 11 11
  407. 12|12|12`,
  408. expectedDelimiter: ';',
  409. },
  410. // case 14 - a really long single line (over 10k) that will get truncated, but since it has commas and semicolons (but more semicolons) it will pick semicolon
  411. {
  412. csv: strings.Repeat("a;b,c;", 1700),
  413. expectedDelimiter: ';',
  414. },
  415. // case 15 - 2 lines that are well over 10k, but since the 2nd line is where this CSV will be truncated (10k sample), it will only use the first line, so semicolon will be picked
  416. {
  417. csv: "col1@col2@col3\na@b@" + strings.Repeat("c", 6000) + "\nd,e," + strings.Repeat("f", 4000),
  418. expectedDelimiter: '@',
  419. },
  420. // case 16 - has all delimiters so should return comma
  421. {
  422. csv: `col1,col2;col3@col4|col5 col6
  423. a b|c@d;e,f`,
  424. expectedDelimiter: ',',
  425. },
  426. // case 16 - nothing works (bad csv) so returns comma by default
  427. {
  428. csv: `col1,col2
  429. a;b
  430. c@e
  431. f g
  432. h|i
  433. jkl`,
  434. expectedDelimiter: ',',
  435. },
  436. }
  437. for n, c := range cases {
  438. delimiter := guessDelimiter([]byte(decodeSlashes(t, c.csv)))
  439. assert.Equal(t, c.expectedDelimiter, delimiter, "case %d: delimiter should be equal, expected '%c' got '%c'", n, c.expectedDelimiter, delimiter)
  440. }
  441. }
  442. func TestGuessFromBeforeAfterQuotes(t *testing.T) {
  443. cases := []struct {
  444. csv string
  445. expectedDelimiter rune
  446. }{
  447. // case 0 - tab delimited with new lines in values, commas in values
  448. {
  449. csv: `1 "some,""more
  450. ""
  451. quoted,
  452. text," a
  453. 2 "some,
  454. quoted,\t
  455. text," b
  456. 3 "some,
  457. quoted,
  458. text" c
  459. 4 "some,
  460. quoted,
  461. text," d`,
  462. expectedDelimiter: '\t',
  463. },
  464. // case 1 - semicolon delmited with quotes and semicolon in value
  465. {
  466. csv: `col1;col2
  467. "this has a literal "" in the text";"and an ; in the text"`,
  468. expectedDelimiter: ';',
  469. },
  470. // case 2 - pipe delimited with quotes
  471. {
  472. csv: `Col1 | Col2 | Col3
  473. abc | "Hello
  474. World"|123
  475. "de
  476. |
  477. f" | 4.56 | 789`,
  478. expectedDelimiter: '|',
  479. },
  480. // case 3 - a complicated quoted CSV that is semicolon delmiited
  481. {
  482. csv: `he; she
  483. "he said, ""hey!"""; "she said, ""hey back!"""
  484. but; "be"`,
  485. expectedDelimiter: ';',
  486. },
  487. // case 4 - no delimiter should be found
  488. {
  489. csv: `a,b`,
  490. expectedDelimiter: 0,
  491. },
  492. // case 5 - no limiter should be found
  493. {
  494. csv: `col1
  495. "he said, ""here I am"""`,
  496. expectedDelimiter: 0,
  497. },
  498. // case 6 - delimiter before double quoted string with space
  499. {
  500. csv: `col1|col2
  501. a| "he said, ""here I am"""`,
  502. expectedDelimiter: '|',
  503. },
  504. // case 7 - delimiter before double quoted string without space
  505. {
  506. csv: `col1|col2
  507. a|"he said, ""here I am"""`,
  508. expectedDelimiter: '|',
  509. },
  510. // case 8 - delimiter after double quoted string with space
  511. {
  512. csv: `col1, col2
  513. "abc\n
  514. ", def`,
  515. expectedDelimiter: ',',
  516. },
  517. // case 9 - delimiter after double quoted string without space
  518. {
  519. csv: `col1,col2
  520. "abc\n
  521. ",def`,
  522. expectedDelimiter: ',',
  523. },
  524. }
  525. for n, c := range cases {
  526. delimiter := guessFromBeforeAfterQuotes([]byte(decodeSlashes(t, c.csv)))
  527. assert.Equal(t, c.expectedDelimiter, delimiter, "case %d: delimiter should be equal, expected '%c' got '%c'", n, c.expectedDelimiter, delimiter)
  528. }
  529. }
  530. func TestFormatError(t *testing.T) {
  531. cases := []struct {
  532. err error
  533. expectedMessage string
  534. expectsError bool
  535. }{
  536. {
  537. err: &csv.ParseError{
  538. Err: csv.ErrFieldCount,
  539. },
  540. expectedMessage: "repo.error.csv.invalid_field_count:0",
  541. expectsError: false,
  542. },
  543. {
  544. err: &csv.ParseError{
  545. Err: csv.ErrBareQuote,
  546. },
  547. expectedMessage: "repo.error.csv.unexpected:0,0",
  548. expectsError: false,
  549. },
  550. {
  551. err: bytes.ErrTooLarge,
  552. expectsError: true,
  553. },
  554. }
  555. for n, c := range cases {
  556. message, err := FormatError(c.err, &translation.MockLocale{})
  557. if c.expectsError {
  558. assert.Error(t, err, "case %d: expected an error to be returned", n)
  559. } else {
  560. assert.NoError(t, err, "case %d: no error was expected, got error: %v", n, err)
  561. assert.Equal(t, c.expectedMessage, message, "case %d: messages should be equal, expected '%s' got '%s'", n, c.expectedMessage, message)
  562. }
  563. }
  564. }