lzo1x_compress.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. /*
  2. * LZO1X Compressor from LZO
  3. *
  4. * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
  5. *
  6. * The full LZO package can be found at:
  7. * http://www.oberhumer.com/opensource/lzo/
  8. *
  9. * Changed for Linux kernel use by:
  10. * Nitin Gupta <nitingupta910@gmail.com>
  11. * Richard Purdie <rpurdie@openedhand.com>
  12. */
  13. #include <linux/module.h>
  14. #include <linux/kernel.h>
  15. #include <asm/unaligned.h>
  16. #include <linux/lzo.h>
  17. #include "lzodefs.h"
  18. static noinline size_t
  19. lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
  20. unsigned char *out, size_t *out_len,
  21. size_t ti, void *wrkmem)
  22. {
  23. const unsigned char *ip;
  24. unsigned char *op;
  25. const unsigned char * const in_end = in + in_len;
  26. const unsigned char * const ip_end = in + in_len - 20;
  27. const unsigned char *ii;
  28. lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
  29. op = out;
  30. ip = in;
  31. ii = ip;
  32. ip += ti < 4 ? 4 - ti : 0;
  33. for (;;) {
  34. const unsigned char *m_pos;
  35. size_t t, m_len, m_off;
  36. u32 dv;
  37. literal:
  38. ip += 1 + ((ip - ii) >> 5);
  39. next:
  40. if (unlikely(ip >= ip_end))
  41. break;
  42. dv = get_unaligned_le32(ip);
  43. t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
  44. m_pos = in + dict[t];
  45. dict[t] = (lzo_dict_t) (ip - in);
  46. if (unlikely(dv != get_unaligned_le32(m_pos)))
  47. goto literal;
  48. ii -= ti;
  49. ti = 0;
  50. t = ip - ii;
  51. if (t != 0) {
  52. if (t <= 3) {
  53. op[-2] |= t;
  54. COPY4(op, ii);
  55. op += t;
  56. } else if (t <= 16) {
  57. *op++ = (t - 3);
  58. COPY8(op, ii);
  59. COPY8(op + 8, ii + 8);
  60. op += t;
  61. } else {
  62. if (t <= 18) {
  63. *op++ = (t - 3);
  64. } else {
  65. size_t tt = t - 18;
  66. *op++ = 0;
  67. while (unlikely(tt > 255)) {
  68. tt -= 255;
  69. *op++ = 0;
  70. }
  71. *op++ = tt;
  72. }
  73. do {
  74. COPY8(op, ii);
  75. COPY8(op + 8, ii + 8);
  76. op += 16;
  77. ii += 16;
  78. t -= 16;
  79. } while (t >= 16);
  80. if (t > 0) do {
  81. *op++ = *ii++;
  82. } while (--t > 0);
  83. }
  84. }
  85. m_len = 4;
  86. {
  87. #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
  88. u64 v;
  89. v = get_unaligned((const u64 *) (ip + m_len)) ^
  90. get_unaligned((const u64 *) (m_pos + m_len));
  91. if (unlikely(v == 0)) {
  92. do {
  93. m_len += 8;
  94. v = get_unaligned((const u64 *) (ip + m_len)) ^
  95. get_unaligned((const u64 *) (m_pos + m_len));
  96. if (unlikely(ip + m_len >= ip_end))
  97. goto m_len_done;
  98. } while (v == 0);
  99. }
  100. # if defined(__LITTLE_ENDIAN)
  101. m_len += (unsigned) __builtin_ctzll(v) / 8;
  102. # elif defined(__BIG_ENDIAN)
  103. m_len += (unsigned) __builtin_clzll(v) / 8;
  104. # else
  105. # error "missing endian definition"
  106. # endif
  107. #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
  108. u32 v;
  109. v = get_unaligned((const u32 *) (ip + m_len)) ^
  110. get_unaligned((const u32 *) (m_pos + m_len));
  111. if (unlikely(v == 0)) {
  112. do {
  113. m_len += 4;
  114. v = get_unaligned((const u32 *) (ip + m_len)) ^
  115. get_unaligned((const u32 *) (m_pos + m_len));
  116. if (v != 0)
  117. break;
  118. m_len += 4;
  119. v = get_unaligned((const u32 *) (ip + m_len)) ^
  120. get_unaligned((const u32 *) (m_pos + m_len));
  121. if (unlikely(ip + m_len >= ip_end))
  122. goto m_len_done;
  123. } while (v == 0);
  124. }
  125. # if defined(__LITTLE_ENDIAN)
  126. m_len += (unsigned) __builtin_ctz(v) / 8;
  127. # elif defined(__BIG_ENDIAN)
  128. m_len += (unsigned) __builtin_clz(v) / 8;
  129. # else
  130. # error "missing endian definition"
  131. # endif
  132. #else
  133. if (unlikely(ip[m_len] == m_pos[m_len])) {
  134. do {
  135. m_len += 1;
  136. if (ip[m_len] != m_pos[m_len])
  137. break;
  138. m_len += 1;
  139. if (ip[m_len] != m_pos[m_len])
  140. break;
  141. m_len += 1;
  142. if (ip[m_len] != m_pos[m_len])
  143. break;
  144. m_len += 1;
  145. if (ip[m_len] != m_pos[m_len])
  146. break;
  147. m_len += 1;
  148. if (ip[m_len] != m_pos[m_len])
  149. break;
  150. m_len += 1;
  151. if (ip[m_len] != m_pos[m_len])
  152. break;
  153. m_len += 1;
  154. if (ip[m_len] != m_pos[m_len])
  155. break;
  156. m_len += 1;
  157. if (unlikely(ip + m_len >= ip_end))
  158. goto m_len_done;
  159. } while (ip[m_len] == m_pos[m_len]);
  160. }
  161. #endif
  162. }
  163. m_len_done:
  164. m_off = ip - m_pos;
  165. ip += m_len;
  166. ii = ip;
  167. if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
  168. m_off -= 1;
  169. *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
  170. *op++ = (m_off >> 3);
  171. } else if (m_off <= M3_MAX_OFFSET) {
  172. m_off -= 1;
  173. if (m_len <= M3_MAX_LEN)
  174. *op++ = (M3_MARKER | (m_len - 2));
  175. else {
  176. m_len -= M3_MAX_LEN;
  177. *op++ = M3_MARKER | 0;
  178. while (unlikely(m_len > 255)) {
  179. m_len -= 255;
  180. *op++ = 0;
  181. }
  182. *op++ = (m_len);
  183. }
  184. *op++ = (m_off << 2);
  185. *op++ = (m_off >> 6);
  186. } else {
  187. m_off -= 0x4000;
  188. if (m_len <= M4_MAX_LEN)
  189. *op++ = (M4_MARKER | ((m_off >> 11) & 8)
  190. | (m_len - 2));
  191. else {
  192. m_len -= M4_MAX_LEN;
  193. *op++ = (M4_MARKER | ((m_off >> 11) & 8));
  194. while (unlikely(m_len > 255)) {
  195. m_len -= 255;
  196. *op++ = 0;
  197. }
  198. *op++ = (m_len);
  199. }
  200. *op++ = (m_off << 2);
  201. *op++ = (m_off >> 6);
  202. }
  203. goto next;
  204. }
  205. *out_len = op - out;
  206. return in_end - (ii - ti);
  207. }
  208. lzo1x_1_do_compress_zram(const unsigned char *in, size_t in_len,
  209. unsigned char *out, size_t *out_len,
  210. size_t ti, void *wrkmem, int *tmp_hash)
  211. {
  212. const unsigned char *ip;
  213. unsigned char *op;
  214. const unsigned char * const in_end = in + in_len;
  215. const unsigned char * const ip_end = in + in_len - 20;
  216. const unsigned char *ii;
  217. int t_total = 0, old_t = 0;
  218. lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
  219. op = out;
  220. ip = in;
  221. ii = ip;
  222. ip += ti < 4 ? 4 - ti : 0;
  223. for (;;) {
  224. const unsigned char *m_pos;
  225. size_t t, m_len, m_off;
  226. u32 dv;
  227. literal2:
  228. ip += 1 + ((ip - ii) >> 5);
  229. next2:
  230. if (unlikely(ip >= ip_end))
  231. break;
  232. dv = get_unaligned_le32(ip);
  233. t = ((dv * 0x1824429d) >> (32 - D_BITS));
  234. if (tmp_hash != NULL) {
  235. *tmp_hash += (int)(t - old_t);
  236. old_t = t;
  237. t_total += t;
  238. }
  239. t = t & D_MASK;
  240. m_pos = in + dict[t];
  241. dict[t] = (lzo_dict_t) (ip - in);
  242. if (unlikely(dv != get_unaligned_le32(m_pos)))
  243. goto literal2;
  244. ii -= ti;
  245. ti = 0;
  246. t = ip - ii;
  247. if (t != 0) {
  248. if (t <= 3) {
  249. op[-2] |= t;
  250. COPY4(op, ii);
  251. op += t;
  252. } else if (t <= 16) {
  253. *op++ = (t - 3);
  254. COPY8(op, ii);
  255. COPY8(op + 8, ii + 8);
  256. op += t;
  257. } else {
  258. if (t <= 18) {
  259. *op++ = (t - 3);
  260. } else {
  261. size_t tt = t - 18;
  262. *op++ = 0;
  263. while (unlikely(tt > 255)) {
  264. tt -= 255;
  265. *op++ = 0;
  266. }
  267. *op++ = tt;
  268. }
  269. do {
  270. COPY8(op, ii);
  271. COPY8(op + 8, ii + 8);
  272. op += 16;
  273. ii += 16;
  274. t -= 16;
  275. } while (t >= 16);
  276. if (t > 0)
  277. do {
  278. *op++ = *ii++;
  279. } while (--t > 0);
  280. }
  281. }
  282. m_len = 4;
  283. {
  284. #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
  285. u64 v;
  286. v = get_unaligned((const u64 *) (ip + m_len)) ^
  287. get_unaligned((const u64 *) (m_pos + m_len));
  288. if (unlikely(v == 0)) {
  289. do {
  290. m_len += 8;
  291. v = get_unaligned((const u64 *) (ip + m_len)) ^
  292. get_unaligned((const u64 *) (m_pos + m_len));
  293. if (unlikely(ip + m_len >= ip_end))
  294. goto m_len_done2;
  295. } while (v == 0);
  296. }
  297. # if defined(__LITTLE_ENDIAN)
  298. m_len += (unsigned) __builtin_ctzll(v) / 8;
  299. # elif defined(__BIG_ENDIAN)
  300. m_len += (unsigned) __builtin_clzll(v) / 8;
  301. # else
  302. # error "missing endian definition"
  303. # endif
  304. #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
  305. u32 v;
  306. v = get_unaligned((const u32 *) (ip + m_len)) ^
  307. get_unaligned((const u32 *) (m_pos + m_len));
  308. if (unlikely(v == 0)) {
  309. do {
  310. m_len += 4;
  311. v = get_unaligned((const u32 *) (ip + m_len)) ^
  312. get_unaligned((const u32 *) (m_pos + m_len));
  313. if (v != 0)
  314. break;
  315. m_len += 4;
  316. v = get_unaligned((const u32 *) (ip + m_len)) ^
  317. get_unaligned((const u32 *) (m_pos + m_len));
  318. if (unlikely(ip + m_len >= ip_end))
  319. goto m_len_done2;
  320. } while (v == 0);
  321. }
  322. # if defined(__LITTLE_ENDIAN)
  323. m_len += (unsigned) __builtin_ctz(v) / 8;
  324. # elif defined(__BIG_ENDIAN)
  325. m_len += (unsigned) __builtin_clz(v) / 8;
  326. # else
  327. # error "missing endian definition"
  328. # endif
  329. #else
  330. if (unlikely(ip[m_len] == m_pos[m_len])) {
  331. do {
  332. m_len += 1;
  333. if (ip[m_len] != m_pos[m_len])
  334. break;
  335. m_len += 1;
  336. if (ip[m_len] != m_pos[m_len])
  337. break;
  338. m_len += 1;
  339. if (ip[m_len] != m_pos[m_len])
  340. break;
  341. m_len += 1;
  342. if (ip[m_len] != m_pos[m_len])
  343. break;
  344. m_len += 1;
  345. if (ip[m_len] != m_pos[m_len])
  346. break;
  347. m_len += 1;
  348. if (ip[m_len] != m_pos[m_len])
  349. break;
  350. m_len += 1;
  351. if (ip[m_len] != m_pos[m_len])
  352. break;
  353. m_len += 1;
  354. if (unlikely(ip + m_len >= ip_end))
  355. goto m_len_done2;
  356. } while (ip[m_len] == m_pos[m_len]);
  357. }
  358. #endif
  359. }
  360. m_len_done2:
  361. m_off = ip - m_pos;
  362. ip += m_len;
  363. ii = ip;
  364. if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
  365. m_off -= 1;
  366. *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
  367. *op++ = (m_off >> 3);
  368. } else if (m_off <= M3_MAX_OFFSET) {
  369. m_off -= 1;
  370. if (m_len <= M3_MAX_LEN)
  371. *op++ = (M3_MARKER | (m_len - 2));
  372. else {
  373. m_len -= M3_MAX_LEN;
  374. *op++ = M3_MARKER | 0;
  375. while (unlikely(m_len > 255)) {
  376. m_len -= 255;
  377. *op++ = 0;
  378. }
  379. *op++ = (m_len);
  380. }
  381. *op++ = (m_off << 2);
  382. *op++ = (m_off >> 6);
  383. } else {
  384. m_off -= 0x4000;
  385. if (m_len <= M4_MAX_LEN)
  386. *op++ = (M4_MARKER | ((m_off >> 11) & 8)
  387. | (m_len - 2));
  388. else {
  389. m_len -= M4_MAX_LEN;
  390. *op++ = (M4_MARKER | ((m_off >> 11) & 8));
  391. while (unlikely(m_len > 255)) {
  392. m_len -= 255;
  393. *op++ = 0;
  394. }
  395. *op++ = (m_len);
  396. }
  397. *op++ = (m_off << 2);
  398. *op++ = (m_off >> 6);
  399. }
  400. goto next2;
  401. }
  402. *out_len = op - out;
  403. if (t_total > *tmp_hash)
  404. *tmp_hash = t_total;
  405. return in_end - (ii - ti);
  406. }
  407. int lzo1x_1_compress(const unsigned char *in, size_t in_len,
  408. unsigned char *out, size_t *out_len,
  409. void *wrkmem)
  410. {
  411. const unsigned char *ip = in;
  412. unsigned char *op = out;
  413. size_t l = in_len;
  414. size_t t = 0;
  415. while (l > 20) {
  416. size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
  417. uintptr_t ll_end = (uintptr_t) ip + ll;
  418. if ((ll_end + ((t + ll) >> 5)) <= ll_end)
  419. break;
  420. BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
  421. memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
  422. t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
  423. ip += ll;
  424. op += *out_len;
  425. l -= ll;
  426. }
  427. t += l;
  428. if (t > 0) {
  429. const unsigned char *ii = in + in_len - t;
  430. if (op == out && t <= 238) {
  431. *op++ = (17 + t);
  432. } else if (t <= 3) {
  433. op[-2] |= t;
  434. } else if (t <= 18) {
  435. *op++ = (t - 3);
  436. } else {
  437. size_t tt = t - 18;
  438. *op++ = 0;
  439. while (tt > 255) {
  440. tt -= 255;
  441. *op++ = 0;
  442. }
  443. *op++ = tt;
  444. }
  445. if (t >= 16) do {
  446. COPY8(op, ii);
  447. COPY8(op + 8, ii + 8);
  448. op += 16;
  449. ii += 16;
  450. t -= 16;
  451. } while (t >= 16);
  452. if (t > 0) do {
  453. *op++ = *ii++;
  454. } while (--t > 0);
  455. }
  456. *op++ = M4_MARKER | 1;
  457. *op++ = 0;
  458. *op++ = 0;
  459. *out_len = op - out;
  460. return LZO_E_OK;
  461. }
  462. EXPORT_SYMBOL_GPL(lzo1x_1_compress);
  463. int lzo1x_1_compress_zram(const unsigned char *in, size_t in_len,
  464. unsigned char *out, size_t *out_len,
  465. void *wrkmem, int *checksum)
  466. {
  467. const unsigned char *ip = in;
  468. unsigned char *op = out;
  469. unsigned int tmp_hash = 0;
  470. unsigned int old_hash = 0;
  471. unsigned int hash_total = 0;
  472. size_t l = in_len;
  473. size_t t = 0;
  474. unsigned int out_hash = 0;
  475. while (l > 20) {
  476. size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
  477. uintptr_t ll_end = (uintptr_t) ip + ll;
  478. if ((ll_end + ((t + ll) >> 5)) <= ll_end)
  479. break;
  480. BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
  481. memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
  482. t = lzo1x_1_do_compress_zram(ip, ll, op, out_len, t, wrkmem, &tmp_hash);
  483. if (checksum != NULL) {
  484. (*checksum) += (tmp_hash - old_hash);
  485. old_hash = tmp_hash;
  486. hash_total += tmp_hash;
  487. }
  488. if (*out_len >= 4) {
  489. unsigned int *tmp_op = op;
  490. out_hash = out_hash ^ *tmp_op;
  491. }
  492. ip += ll;
  493. op += *out_len;
  494. l -= ll;
  495. }
  496. t += l;
  497. if (t > 0) {
  498. const unsigned char *ii = in + in_len - t;
  499. if (op == out && t <= 238) {
  500. *op++ = (17 + t);
  501. } else if (t <= 3) {
  502. op[-2] |= t;
  503. } else if (t <= 18) {
  504. *op++ = (t - 3);
  505. } else {
  506. size_t tt = t - 18;
  507. *op++ = 0;
  508. while (tt > 255) {
  509. tt -= 255;
  510. *op++ = 0;
  511. }
  512. *op++ = tt;
  513. }
  514. if (t >= 16)
  515. do {
  516. COPY8(op, ii);
  517. COPY8(op + 8, ii + 8);
  518. op += 16;
  519. ii += 16;
  520. t -= 16;
  521. } while (t >= 16);
  522. if (t > 0)
  523. do {
  524. *op++ = *ii++;
  525. } while (--t > 0);
  526. }
  527. *op++ = M4_MARKER | 1;
  528. *op++ = 0;
  529. *op++ = 0;
  530. *out_len = op - out;
  531. if (hash_total > (unsigned int)*checksum)
  532. *checksum = hash_total;
  533. if (out_hash != 0)
  534. *checksum = out_hash^(unsigned int)*checksum;
  535. if (*out_len >= 4) {
  536. unsigned int *tmp_out = out;
  537. unsigned int tmp_checksum = 0;
  538. tmp_checksum = (unsigned int)*checksum+(unsigned int)*tmp_out;
  539. *checksum = (int)tmp_checksum;
  540. }
  541. return LZO_E_OK;
  542. }
  543. EXPORT_SYMBOL_GPL(lzo1x_1_compress_zram);
  544. MODULE_LICENSE("GPL");
  545. MODULE_DESCRIPTION("LZO1X-1 Compressor");