tuxonice_bio_core.c 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892
  1. /*
  2. * kernel/power/tuxonice_bio.c
  3. *
  4. * Copyright (C) 2004-2014 Nigel Cunningham (nigel at tuxonice net)
  5. *
  6. * Distributed under GPLv2.
  7. *
  8. * This file contains block io functions for TuxOnIce. These are
  9. * used by the swapwriter and it is planned that they will also
  10. * be used by the NFSwriter.
  11. *
  12. */
  13. #include <linux/blkdev.h>
  14. #include <linux/syscalls.h>
  15. #include <linux/suspend.h>
  16. #include <linux/ctype.h>
  17. #include <linux/fs_uuid.h>
  18. #include "tuxonice.h"
  19. #include "tuxonice_sysfs.h"
  20. #include "tuxonice_modules.h"
  21. #include "tuxonice_prepare_image.h"
  22. #include "tuxonice_bio.h"
  23. #include "tuxonice_ui.h"
  24. #include "tuxonice_alloc.h"
  25. #include "tuxonice_io.h"
  26. #include "tuxonice_builtin.h"
  27. #include "tuxonice_bio_internal.h"
  28. #define MEMORY_ONLY 1
  29. #define THROTTLE_WAIT 2
  30. /* #define MEASURE_MUTEX_CONTENTION */
  31. #ifndef MEASURE_MUTEX_CONTENTION
  32. #define my_mutex_lock(index, the_lock) mutex_lock(the_lock)
  33. #define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
  34. #else
  35. unsigned long mutex_times[2][2][NR_CPUS];
  36. #define my_mutex_lock(index, the_lock) \
  37. do { \
  38. int have_mutex; \
  39. have_mutex = mutex_trylock(the_lock); \
  40. if (!have_mutex) { \
  41. mutex_lock(the_lock); \
  42. mutex_times[index][0][smp_processor_id()]++; \
  43. } else { \
  44. mutex_times[index][1][smp_processor_id()]++; \
  45. } \
  46. } while (0)
  47. #define my_mutex_unlock(index, the_lock) mutex_unlock(the_lock)
  48. #endif
  49. static int page_idx, reset_idx;
  50. static int target_outstanding_io = 1024;
  51. static int max_outstanding_writes, max_outstanding_reads;
  52. static struct page *bio_queue_head, *bio_queue_tail;
  53. static atomic_t toi_bio_queue_size;
  54. static DEFINE_SPINLOCK(bio_queue_lock);
  55. static int free_mem_throttle, throughput_throttle;
  56. int more_readahead = 1;
  57. static struct page *readahead_list_head, *readahead_list_tail;
  58. static struct page *waiting_on;
  59. static atomic_t toi_io_in_progress, toi_io_done;
  60. static DECLARE_WAIT_QUEUE_HEAD(num_in_progress_wait);
  61. int current_stream;
  62. /* Not static, so that the allocators can setup and complete
  63. * writing the header */
  64. char *toi_writer_buffer;
  65. int toi_writer_buffer_posn;
  66. static DEFINE_MUTEX(toi_bio_mutex);
  67. static DEFINE_MUTEX(toi_bio_readahead_mutex);
  68. static struct task_struct *toi_queue_flusher;
  69. static int toi_bio_queue_flush_pages(int dedicated_thread);
  70. struct toi_module_ops toi_blockwriter_ops;
  71. #define TOTAL_OUTSTANDING_IO (atomic_read(&toi_io_in_progress) + \
  72. atomic_read(&toi_bio_queue_size))
  73. unsigned long raw_pages_allocd, header_pages_reserved;
  74. /**
  75. * set_free_mem_throttle - set the point where we pause to avoid oom.
  76. *
  77. * Initially, this value is zero, but when we first fail to allocate memory,
  78. * we set it (plus a buffer) and thereafter throttle i/o once that limit is
  79. * reached.
  80. **/
  81. static void set_free_mem_throttle(void)
  82. {
  83. #ifdef CONFIG_TOI_FIXUP
  84. int new_throttle = nr_unallocated_buffer_pages() + 256;
  85. #else
  86. int new_throttle = nr_free_buffer_pages() + 256;
  87. #endif
  88. if (new_throttle > free_mem_throttle)
  89. free_mem_throttle = new_throttle;
  90. }
  91. #define NUM_REASONS 7
  92. static atomic_t reasons[NUM_REASONS];
  93. static char *reason_name[NUM_REASONS] = {
  94. "readahead not ready",
  95. "bio allocation",
  96. "synchronous I/O",
  97. "toi_bio_get_new_page",
  98. "memory low",
  99. "readahead buffer allocation",
  100. "throughput_throttle",
  101. };
  102. /* User Specified Parameters. */
  103. unsigned long resume_firstblock;
  104. dev_t resume_dev_t;
  105. struct block_device *resume_block_device;
  106. static atomic_t resume_bdev_open_count;
  107. struct block_device *header_block_device;
  108. /**
  109. * toi_open_bdev: Open a bdev at resume time.
  110. *
  111. * index: The swap index. May be MAX_SWAPFILES for the resume_dev_t
  112. * (the user can have resume= pointing at a swap partition/file that isn't
  113. * swapon'd when they hibernate. MAX_SWAPFILES+1 for the first page of the
  114. * header. It will be from a swap partition that was enabled when we hibernated,
  115. * but we don't know it's real index until we read that first page.
  116. * dev_t: The device major/minor.
  117. * display_errs: Whether to try to do this quietly.
  118. *
  119. * We stored a dev_t in the image header. Open the matching device without
  120. * requiring /dev/<whatever> in most cases and record the details needed
  121. * to close it later and avoid duplicating work.
  122. */
  123. struct block_device *toi_open_bdev(char *uuid, dev_t default_device, int display_errs)
  124. {
  125. struct block_device *bdev;
  126. dev_t device = default_device;
  127. char buf[32];
  128. int retried = 0;
  129. retry:
  130. if (uuid) {
  131. struct fs_info seek;
  132. strncpy((char *)&seek.uuid, uuid, 16);
  133. seek.dev_t = 0;
  134. seek.last_mount_size = 0;
  135. device = blk_lookup_fs_info(&seek);
  136. if (!device) {
  137. device = default_device;
  138. pr_debug("Unable to resolve uuid. Falling back" " to dev_t.\n");
  139. } else
  140. pr_debug("Resolved uuid to device %s.\n",
  141. format_dev_t(buf, device));
  142. }
  143. if (!device) {
  144. pr_err("TuxOnIce attempting to open a " "blank dev_t!\n");
  145. dump_stack();
  146. return NULL;
  147. }
  148. bdev = toi_open_by_devnum(device);
  149. if (IS_ERR(bdev) || !bdev) {
  150. if (!retried) {
  151. retried = 1;
  152. wait_for_device_probe();
  153. goto retry;
  154. }
  155. if (display_errs)
  156. toi_early_boot_message(1, TOI_CONTINUE_REQ,
  157. "Failed accessing blk-dev \"%x\" (err %d). run mknod and/or lvmsetup in initrd/ramfs",
  158. device, bdev);
  159. return ERR_PTR(-EINVAL);
  160. }
  161. toi_message(TOI_BIO, TOI_VERBOSE, 0, "TuxOnIce got bdev %p for dev_t %x.", bdev, device);
  162. return bdev;
  163. }
  164. static void toi_bio_reserve_header_space(unsigned long request)
  165. {
  166. header_pages_reserved = request;
  167. }
  168. /**
  169. * do_bio_wait - wait for some TuxOnIce I/O to complete
  170. * @reason: The array index of the reason we're waiting.
  171. *
  172. * Wait for a particular page of I/O if we're after a particular page.
  173. * If we're not after a particular page, wait instead for all in flight
  174. * I/O to be completed or for us to have enough free memory to be able
  175. * to submit more I/O.
  176. *
  177. * If we wait, we also update our statistics regarding why we waited.
  178. **/
  179. static void do_bio_wait(int reason)
  180. {
  181. struct page *was_waiting_on = waiting_on;
  182. /* On SMP, waiting_on can be reset, so we make a copy */
  183. if (was_waiting_on) {
  184. wait_on_page_locked(was_waiting_on);
  185. atomic_inc(&reasons[reason]);
  186. } else {
  187. atomic_inc(&reasons[reason]);
  188. #ifdef CONFIG_TOI_FIXUP
  189. wait_event(num_in_progress_wait,
  190. !atomic_read(&toi_io_in_progress) ||
  191. nr_unallocated_buffer_pages() > free_mem_throttle);
  192. #else
  193. wait_event(num_in_progress_wait,
  194. !atomic_read(&toi_io_in_progress) ||
  195. nr_free_buffer_pages() > free_mem_throttle);
  196. #endif
  197. }
  198. }
  199. /**
  200. * throttle_if_needed - wait for I/O completion if throttle points are reached
  201. * @flags: What to check and how to act.
  202. *
  203. * Check whether we need to wait for some I/O to complete. We always check
  204. * whether we have enough memory available, but may also (depending upon
  205. * @reason) check if the throughput throttle limit has been reached.
  206. **/
  207. static int throttle_if_needed(int flags)
  208. {
  209. #ifdef CONFIG_TOI_FIXUP
  210. int free_pages = nr_unallocated_buffer_pages();
  211. #else
  212. int free_pages = nr_free_buffer_pages();
  213. #endif
  214. /* Getting low on memory and I/O is in progress? */
  215. while (unlikely(free_pages < free_mem_throttle) &&
  216. atomic_read(&toi_io_in_progress) && !test_result_state(TOI_ABORTED)) {
  217. if (!(flags & THROTTLE_WAIT))
  218. return -ENOMEM;
  219. do_bio_wait(4);
  220. #ifdef CONFIG_TOI_FIXUP
  221. free_pages = nr_unallocated_buffer_pages();
  222. #else
  223. free_pages = nr_free_buffer_pages();
  224. #endif
  225. }
  226. while (!(flags & MEMORY_ONLY) && throughput_throttle &&
  227. TOTAL_OUTSTANDING_IO >= throughput_throttle && !test_result_state(TOI_ABORTED)) {
  228. int result = toi_bio_queue_flush_pages(0);
  229. if (result)
  230. return result;
  231. atomic_inc(&reasons[6]);
  232. wait_event(num_in_progress_wait,
  233. !atomic_read(&toi_io_in_progress) ||
  234. TOTAL_OUTSTANDING_IO < throughput_throttle);
  235. }
  236. return 0;
  237. }
  238. /**
  239. * update_throughput_throttle - update the raw throughput throttle
  240. * @jif_index: The number of times this function has been called.
  241. *
  242. * This function is called four times per second by the core, and used to limit
  243. * the amount of I/O we submit at once, spreading out our waiting through the
  244. * whole job and letting userui get an opportunity to do its work.
  245. *
  246. * We don't start limiting I/O until 1/4s has gone so that we get a
  247. * decent sample for our initial limit, and keep updating it because
  248. * throughput may vary (on rotating media, eg) with our block number.
  249. *
  250. * We throttle to 1/10s worth of I/O.
  251. **/
  252. static void update_throughput_throttle(int jif_index)
  253. {
  254. int done = atomic_read(&toi_io_done);
  255. throughput_throttle = done * 2 / 5 / jif_index;
  256. }
  257. /**
  258. * toi_finish_all_io - wait for all outstanding i/o to complete
  259. *
  260. * Flush any queued but unsubmitted I/O and wait for it all to complete.
  261. **/
  262. static int toi_finish_all_io(void)
  263. {
  264. int result = toi_bio_queue_flush_pages(0);
  265. toi_bio_queue_flusher_should_finish = 1;
  266. wake_up(&toi_io_queue_flusher);
  267. wait_event(num_in_progress_wait, !TOTAL_OUTSTANDING_IO);
  268. return result;
  269. }
  270. /**
  271. * toi_end_bio - bio completion function.
  272. * @bio: bio that has completed.
  273. * @err: Error value. Yes, like end_swap_bio_read, we ignore it.
  274. *
  275. * Function called by the block driver from interrupt context when I/O is
  276. * completed. If we were writing the page, we want to free it and will have
  277. * set bio->bi_private to the parameter we should use in telling the page
  278. * allocation accounting code what the page was allocated for. If we're
  279. * reading the page, it will be in the singly linked list made from
  280. * page->private pointers.
  281. **/
  282. static void toi_end_bio(struct bio *bio, int err)
  283. {
  284. struct page *page = bio->bi_io_vec[0].bv_page;
  285. #ifdef CONFIG_TOI_FIXUP
  286. if (!err)
  287. BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
  288. #else
  289. BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
  290. #endif
  291. unlock_page(page);
  292. bio_put(bio);
  293. if (waiting_on == page)
  294. waiting_on = NULL;
  295. put_page(page);
  296. if (bio->bi_private)
  297. toi__free_page((int)((unsigned long)bio->bi_private), page);
  298. bio_put(bio);
  299. atomic_dec(&toi_io_in_progress);
  300. atomic_inc(&toi_io_done);
  301. wake_up(&num_in_progress_wait);
  302. }
  303. /**
  304. * submit - submit BIO request
  305. * @writing: READ or WRITE.
  306. * @dev: The block device we're using.
  307. * @first_block: The first sector we're using.
  308. * @page: The page being used for I/O.
  309. * @free_group: If writing, the group that was used in allocating the page
  310. * and which will be used in freeing the page from the completion
  311. * routine.
  312. *
  313. * Based on Patrick Mochell's pmdisk code from long ago: "Straight from the
  314. * textbook - allocate and initialize the bio. If we're writing, make sure
  315. * the page is marked as dirty. Then submit it and carry on."
  316. *
  317. * If we're just testing the speed of our own code, we fake having done all
  318. * the hard work and all toi_end_bio immediately.
  319. **/
  320. static int submit(int writing, struct block_device *dev, sector_t first_block,
  321. struct page *page, int free_group)
  322. {
  323. struct bio *bio = NULL;
  324. int cur_outstanding_io, result;
  325. /*
  326. * Shouldn't throttle if reading - can deadlock in the single
  327. * threaded case as pages are only freed when we use the
  328. * readahead.
  329. */
  330. if (writing) {
  331. result = throttle_if_needed(MEMORY_ONLY | THROTTLE_WAIT);
  332. if (result)
  333. return result;
  334. }
  335. while (!bio) {
  336. bio = bio_alloc(TOI_ATOMIC_GFP, 1);
  337. if (!bio) {
  338. set_free_mem_throttle();
  339. do_bio_wait(1);
  340. }
  341. }
  342. bio->bi_bdev = dev;
  343. bio->bi_iter.bi_sector = first_block;
  344. bio->bi_private = (void *)((unsigned long)free_group);
  345. bio->bi_end_io = toi_end_bio;
  346. bio->bi_flags |= (1 << BIO_TOI);
  347. if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
  348. pr_debug("ERROR: adding page to bio at %lld\n",
  349. (unsigned long long)first_block);
  350. bio_put(bio);
  351. return -EFAULT;
  352. }
  353. bio_get(bio);
  354. cur_outstanding_io = atomic_add_return(1, &toi_io_in_progress);
  355. if (writing) {
  356. if (cur_outstanding_io > max_outstanding_writes)
  357. max_outstanding_writes = cur_outstanding_io;
  358. } else {
  359. if (cur_outstanding_io > max_outstanding_reads)
  360. max_outstanding_reads = cur_outstanding_io;
  361. }
  362. /* Still read the header! */
  363. if (unlikely(test_action_state(TOI_TEST_BIO) && writing)) {
  364. /* Fake having done the hard work */
  365. set_bit(BIO_UPTODATE, &bio->bi_flags);
  366. toi_end_bio(bio, 0);
  367. } else
  368. submit_bio(writing | REQ_SYNC, bio);
  369. return 0;
  370. }
  371. /**
  372. * toi_do_io: Prepare to do some i/o on a page and submit or batch it.
  373. *
  374. * @writing: Whether reading or writing.
  375. * @bdev: The block device which we're using.
  376. * @block0: The first sector we're reading or writing.
  377. * @page: The page on which I/O is being done.
  378. * @readahead_index: If doing readahead, the index (reset this flag when done).
  379. * @syncio: Whether the i/o is being done synchronously.
  380. *
  381. * Prepare and start a read or write operation.
  382. *
  383. * Note that we always work with our own page. If writing, we might be given a
  384. * compression buffer that will immediately be used to start compressing the
  385. * next page. For reading, we do readahead and therefore don't know the final
  386. * address where the data needs to go.
  387. **/
  388. int toi_do_io(int writing, struct block_device *bdev, long block0,
  389. struct page *page, int is_readahead, int syncio, int free_group)
  390. {
  391. page->private = 0;
  392. /* Do here so we don't race against toi_bio_get_next_page_read */
  393. lock_page(page);
  394. if (is_readahead) {
  395. if (readahead_list_head)
  396. readahead_list_tail->private = (unsigned long)page;
  397. else
  398. readahead_list_head = page;
  399. readahead_list_tail = page;
  400. }
  401. /* Done before submitting to avoid races. */
  402. if (syncio)
  403. waiting_on = page;
  404. /* Submit the page */
  405. get_page(page);
  406. if (submit(writing, bdev, block0, page, free_group))
  407. return -EFAULT;
  408. if (syncio)
  409. do_bio_wait(2);
  410. return 0;
  411. }
  412. /**
  413. * toi_bdev_page_io - simpler interface to do directly i/o on a single page
  414. * @writing: Whether reading or writing.
  415. * @bdev: Block device on which we're operating.
  416. * @pos: Sector at which page to read or write starts.
  417. * @page: Page to be read/written.
  418. *
  419. * A simple interface to submit a page of I/O and wait for its completion.
  420. * The caller must free the page used.
  421. **/
  422. static int toi_bdev_page_io(int writing, struct block_device *bdev, long pos, struct page *page)
  423. {
  424. return toi_do_io(writing, bdev, pos, page, 0, 1, 0);
  425. }
  426. /**
  427. * toi_bio_memory_needed - report the amount of memory needed for block i/o
  428. *
  429. * We want to have at least enough memory so as to have target_outstanding_io
  430. * or more transactions on the fly at once. If we can do more, fine.
  431. **/
  432. static int toi_bio_memory_needed(void)
  433. {
  434. return target_outstanding_io * (PAGE_SIZE + sizeof(struct request) + sizeof(struct bio));
  435. }
  436. /**
  437. * toi_bio_print_debug_stats - put out debugging info in the buffer provided
  438. * @buffer: A buffer of size @size into which text should be placed.
  439. * @size: The size of @buffer.
  440. *
  441. * Fill a buffer with debugging info. This is used for both our debug_info sysfs
  442. * entry and for recording the same info in dmesg.
  443. **/
  444. static int toi_bio_print_debug_stats(char *buffer, int size)
  445. {
  446. int len = 0;
  447. if (toiActiveAllocator != &toi_blockwriter_ops) {
  448. len = scnprintf(buffer, size, "- Block I/O inactive.\n");
  449. return len;
  450. }
  451. len = scnprintf(buffer, size, "- Block I/O active.\n");
  452. len += toi_bio_chains_debug_info(buffer + len, size - len);
  453. len += scnprintf(buffer + len, size - len,
  454. "- Max outstanding reads %d. Max writes %d.\n",
  455. max_outstanding_reads, max_outstanding_writes);
  456. len += scnprintf(buffer + len, size - len,
  457. " Memory_needed: %d x (%lu + %u + %u) = %d bytes.\n",
  458. target_outstanding_io,
  459. PAGE_SIZE, (unsigned int)sizeof(struct request),
  460. (unsigned int)sizeof(struct bio), toi_bio_memory_needed());
  461. #ifdef MEASURE_MUTEX_CONTENTION
  462. {
  463. int i;
  464. len += scnprintf(buffer + len, size - len,
  465. " Mutex contention while reading:\n Contended Free\n");
  466. for_each_online_cpu(i)
  467. len += scnprintf(buffer + len, size - len,
  468. " %9lu %9lu\n", mutex_times[0][0][i], mutex_times[0][1][i]);
  469. len += scnprintf(buffer + len, size - len,
  470. " Mutex contention while writing:\n Contended Free\n");
  471. for_each_online_cpu(i)
  472. len += scnprintf(buffer + len, size - len,
  473. " %9lu %9lu\n", mutex_times[1][0][i], mutex_times[1][1][i]);
  474. }
  475. #endif
  476. return len + scnprintf(buffer + len, size - len,
  477. " Free mem throttle point reached %d.\n", free_mem_throttle);
  478. }
  479. static int total_header_bytes;
  480. static int unowned;
  481. void debug_broken_header(void)
  482. {
  483. pr_debug("Image header too big for size allocated!\n");
  484. print_toi_header_storage_for_modules();
  485. pr_debug("Page flags : %d.\n", toi_pageflags_space_needed());
  486. pr_debug("toi_header : %zu.\n", sizeof(struct toi_header));
  487. pr_debug("Total unowned : %d.\n", unowned);
  488. pr_debug("Total used : %d (%ld pages).\n", total_header_bytes,
  489. DIV_ROUND_UP(total_header_bytes, PAGE_SIZE));
  490. pr_debug("Space needed now : %ld.\n", get_header_storage_needed());
  491. dump_block_chains();
  492. abort_hibernate(TOI_HEADER_TOO_BIG, "Header reservation too small.");
  493. }
  494. /**
  495. * toi_rw_init - prepare to read or write a stream in the image
  496. * @writing: Whether reading or writing.
  497. * @stream number: Section of the image being processed.
  498. *
  499. * Prepare to read or write a section ('stream') in the image.
  500. **/
  501. static int toi_rw_init(int writing, int stream_number)
  502. {
  503. if (stream_number)
  504. toi_extent_state_restore(stream_number);
  505. else
  506. toi_extent_state_goto_start();
  507. if (writing) {
  508. reset_idx = 0;
  509. if (!current_stream)
  510. page_idx = 0;
  511. } else {
  512. reset_idx = 1;
  513. }
  514. atomic_set(&toi_io_done, 0);
  515. if (!toi_writer_buffer)
  516. toi_writer_buffer = (char *)toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
  517. toi_writer_buffer_posn = writing ? 0 : PAGE_SIZE;
  518. current_stream = stream_number;
  519. more_readahead = 1;
  520. return toi_writer_buffer ? 0 : -ENOMEM;
  521. }
  522. /**
  523. * toi_bio_queue_write - queue a page for writing
  524. * @full_buffer: Pointer to a page to be queued
  525. *
  526. * Add a page to the queue to be submitted. If we're the queue flusher,
  527. * we'll do this once we've dropped toi_bio_mutex, so other threads can
  528. * continue to submit I/O while we're on the slow path doing the actual
  529. * submission.
  530. **/
  531. static void toi_bio_queue_write(char **full_buffer)
  532. {
  533. struct page *page = virt_to_page(*full_buffer);
  534. unsigned long flags;
  535. *full_buffer = NULL;
  536. page->private = 0;
  537. spin_lock_irqsave(&bio_queue_lock, flags);
  538. if (!bio_queue_head)
  539. bio_queue_head = page;
  540. else
  541. bio_queue_tail->private = (unsigned long)page;
  542. bio_queue_tail = page;
  543. atomic_inc(&toi_bio_queue_size);
  544. spin_unlock_irqrestore(&bio_queue_lock, flags);
  545. wake_up(&toi_io_queue_flusher);
  546. }
  547. /**
  548. * toi_rw_cleanup - Cleanup after i/o.
  549. * @writing: Whether we were reading or writing.
  550. *
  551. * Flush all I/O and clean everything up after reading or writing a
  552. * section of the image.
  553. **/
  554. static int toi_rw_cleanup(int writing)
  555. {
  556. int i, result = 0;
  557. toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_rw_cleanup.");
  558. if (writing) {
  559. if (toi_writer_buffer_posn && !test_result_state(TOI_ABORTED))
  560. toi_bio_queue_write(&toi_writer_buffer);
  561. while (bio_queue_head && !result)
  562. result = toi_bio_queue_flush_pages(0);
  563. if (result)
  564. return result;
  565. if (current_stream == 2)
  566. toi_extent_state_save(1);
  567. else if (current_stream == 1)
  568. toi_extent_state_save(3);
  569. }
  570. result = toi_finish_all_io();
  571. while (readahead_list_head) {
  572. void *next = (void *)readahead_list_head->private;
  573. toi__free_page(12, readahead_list_head);
  574. readahead_list_head = next;
  575. }
  576. readahead_list_tail = NULL;
  577. if (!current_stream)
  578. return result;
  579. for (i = 0; i < NUM_REASONS; i++) {
  580. if (!atomic_read(&reasons[i]))
  581. continue;
  582. pr_debug("Waited for i/o due to %s %d times.\n",
  583. reason_name[i], atomic_read(&reasons[i]));
  584. atomic_set(&reasons[i], 0);
  585. }
  586. current_stream = 0;
  587. return result;
  588. }
  589. #ifdef CONFIG_TOI_FIXUP
  590. static void check_mem_status(void)
  591. {
  592. return; /* suppress messages, unless we need it!! */
  593. hib_warn
  594. ("%d: free/throttle: %lu/%d, no_readahead/target_oustanding_io:%d/%d io_in/io_done:%d/%d\n",
  595. __LINE__, nr_unallocated_buffer_pages(), free_mem_throttle,
  596. test_action_state(TOI_NO_READAHEAD), target_outstanding_io,
  597. atomic_read(&toi_io_in_progress), atomic_read(&toi_io_done));
  598. HIB_SHOW_MEMINFO();
  599. }
  600. static int hit_lowmem_barrier(void)
  601. {
  602. unsigned long free_pages;
  603. struct zone *zone;
  604. for_each_populated_zone(zone) {
  605. if (!strcmp(zone->name, "Normal")) {
  606. free_pages = zone_page_state(zone, NR_FREE_PAGES);
  607. return (free_pages < min_wmark_pages(zone)) ? 1 : 0;
  608. }
  609. }
  610. return 0;
  611. }
  612. #endif
  613. /**
  614. * toi_start_one_readahead - start one page of readahead
  615. * @dedicated_thread: Is this a thread dedicated to doing readahead?
  616. *
  617. * Start one new page of readahead. If this is being called by a thread
  618. * whose only just is to submit readahead, don't quit because we failed
  619. * to allocate a page.
  620. **/
  621. static int toi_start_one_readahead(int dedicated_thread)
  622. {
  623. char *buffer = NULL;
  624. int oom = 0, result;
  625. result = throttle_if_needed(dedicated_thread ? THROTTLE_WAIT : 0);
  626. #ifdef CONFIG_TOI_FIXUP
  627. if (result)
  628. return result;
  629. #else
  630. if (result) {
  631. pr_warn("toi_start_one_readahead: throttle_if_needed returned %d.\n", result);
  632. return result;
  633. }
  634. #endif
  635. mutex_lock(&toi_bio_readahead_mutex);
  636. while (!buffer) {
  637. #ifdef CONFIG_TOI_FIXUP
  638. buffer =
  639. likely(hit_lowmem_barrier()) ? NULL : (char *)toi_get_zeroed_page(12,
  640. TOI_ATOMIC_GFP);
  641. #else
  642. buffer = (char *)toi_get_zeroed_page(12, TOI_ATOMIC_GFP);
  643. #endif
  644. if (!buffer) {
  645. if (oom && !dedicated_thread) {
  646. mutex_unlock(&toi_bio_readahead_mutex);
  647. #ifndef CONFIG_TOI_FIXUP
  648. pr_warn("toi_start_one_readahead: oom and !dedicated thread %d.\n",
  649. result);
  650. #endif
  651. return -ENOMEM;
  652. }
  653. oom = 1;
  654. #ifdef CONFIG_TOI_FIXUP
  655. check_mem_status();
  656. #endif
  657. set_free_mem_throttle();
  658. do_bio_wait(5);
  659. }
  660. }
  661. result = toi_bio_rw_page(READ, virt_to_page(buffer), 1, 0);
  662. #ifndef CONFIG_TOI_FIXUP
  663. if (result)
  664. pr_warn("toi_start_one_readahead: toi_bio_rw_page returned %d.\n", result);
  665. #endif
  666. if (result == -ENOSPC)
  667. toi__free_page(12, virt_to_page(buffer));
  668. mutex_unlock(&toi_bio_readahead_mutex);
  669. if (result) {
  670. if (result == -ENOSPC)
  671. toi_message(TOI_BIO, TOI_VERBOSE, 0, "Last readahead page submitted.");
  672. else
  673. pr_debug("toi_bio_rw_page returned %d.\n", result);
  674. }
  675. return result;
  676. }
  677. /**
  678. * toi_start_new_readahead - start new readahead
  679. * @dedicated_thread: Are we dedicated to this task?
  680. *
  681. * Start readahead of image pages.
  682. *
  683. * We can be called as a thread dedicated to this task (may be helpful on
  684. * systems with lots of CPUs), in which case we don't exit until there's no
  685. * more readahead.
  686. *
  687. * If this is not called by a dedicated thread, we top up our queue until
  688. * there's no more readahead to submit, we've submitted the number given
  689. * in target_outstanding_io or the number in progress exceeds the target
  690. * outstanding I/O value.
  691. *
  692. * No mutex needed because this is only ever called by the first cpu.
  693. **/
  694. static int toi_start_new_readahead(int dedicated_thread)
  695. {
  696. int last_result, num_submitted = 0;
  697. /* Start a new readahead? */
  698. if (!more_readahead)
  699. return 0;
  700. do {
  701. last_result = toi_start_one_readahead(dedicated_thread);
  702. if (last_result) {
  703. if (last_result == -ENOMEM || last_result == -ENOSPC)
  704. return 0;
  705. pr_debug("Begin read chunk returned %d.\n", last_result);
  706. } else
  707. num_submitted++;
  708. } while (more_readahead && !last_result &&
  709. (dedicated_thread ||
  710. (num_submitted < target_outstanding_io &&
  711. atomic_read(&toi_io_in_progress) < target_outstanding_io)));
  712. return last_result;
  713. }
  714. /**
  715. * bio_io_flusher - start the dedicated I/O flushing routine
  716. * @writing: Whether we're writing the image.
  717. **/
  718. static int bio_io_flusher(int writing)
  719. {
  720. if (writing)
  721. return toi_bio_queue_flush_pages(1);
  722. else
  723. return toi_start_new_readahead(1);
  724. }
  725. /**
  726. * toi_bio_get_next_page_read - read a disk page, perhaps with readahead
  727. * @no_readahead: Whether we can use readahead
  728. *
  729. * Read a page from disk, submitting readahead and cleaning up finished i/o
  730. * while we wait for the page we're after.
  731. **/
  732. static int toi_bio_get_next_page_read(int no_readahead)
  733. {
  734. char *virt;
  735. struct page *old_readahead_list_head;
  736. /*
  737. * When reading the second page of the header, we have to
  738. * delay submitting the read until after we've gotten the
  739. * extents out of the first page.
  740. */
  741. if (unlikely(no_readahead)) {
  742. int result = toi_start_one_readahead(0);
  743. if (result) {
  744. pr_err("%d:No readahead and toi_start_one_readahead returned non-zero.\n", __LINE__);
  745. return -EIO;
  746. }
  747. }
  748. if (unlikely(!readahead_list_head)) {
  749. /*
  750. * If the last page finishes exactly on the page
  751. * boundary, we will be called one extra time and
  752. * have no data to return. In this case, we should
  753. * not BUG(), like we used to!
  754. */
  755. if (!more_readahead) {
  756. pr_err("No more readahead.\n");
  757. return -ENOSPC;
  758. }
  759. if (unlikely(toi_start_one_readahead(0))) {
  760. pr_err("%d:No readahead and toi_start_one_readahead returned non-zero.\n", __LINE__);
  761. return -EIO;
  762. }
  763. }
  764. if (PageLocked(readahead_list_head)) {
  765. waiting_on = readahead_list_head;
  766. do_bio_wait(0);
  767. }
  768. virt = page_address(readahead_list_head);
  769. memcpy(toi_writer_buffer, virt, PAGE_SIZE);
  770. mutex_lock(&toi_bio_readahead_mutex);
  771. old_readahead_list_head = readahead_list_head;
  772. readahead_list_head = (struct page *)readahead_list_head->private;
  773. mutex_unlock(&toi_bio_readahead_mutex);
  774. toi__free_page(12, old_readahead_list_head);
  775. return 0;
  776. }
  777. /**
  778. * toi_bio_queue_flush_pages - flush the queue of pages queued for writing
  779. * @dedicated_thread: Whether we're a dedicated thread
  780. *
  781. * Flush the queue of pages ready to be written to disk.
  782. *
  783. * If we're a dedicated thread, stay in here until told to leave,
  784. * sleeping in wait_event.
  785. *
  786. * The first thread is normally the only one to come in here. Another
  787. * thread can enter this routine too, though, via throttle_if_needed.
  788. * Since that's the case, we must be careful to only have one thread
  789. * doing this work at a time. Otherwise we have a race and could save
  790. * pages out of order.
  791. *
  792. * If an error occurs, free all remaining pages without submitting them
  793. * for I/O.
  794. **/
  795. int toi_bio_queue_flush_pages(int dedicated_thread)
  796. {
  797. unsigned long flags;
  798. int result = 0;
  799. static DEFINE_MUTEX(busy);
  800. if (!mutex_trylock(&busy))
  801. return 0;
  802. top:
  803. spin_lock_irqsave(&bio_queue_lock, flags);
  804. while (bio_queue_head) {
  805. struct page *page = bio_queue_head;
  806. bio_queue_head = (struct page *)page->private;
  807. if (bio_queue_tail == page)
  808. bio_queue_tail = NULL;
  809. atomic_dec(&toi_bio_queue_size);
  810. spin_unlock_irqrestore(&bio_queue_lock, flags);
  811. /* Don't generate more error messages if already had one */
  812. if (!result)
  813. result = toi_bio_rw_page(WRITE, page, 0, 11);
  814. /*
  815. * If writing the page failed, don't drop out.
  816. * Flush the rest of the queue too.
  817. */
  818. if (result)
  819. toi__free_page(11, page);
  820. spin_lock_irqsave(&bio_queue_lock, flags);
  821. }
  822. spin_unlock_irqrestore(&bio_queue_lock, flags);
  823. if (dedicated_thread) {
  824. wait_event(toi_io_queue_flusher, bio_queue_head ||
  825. toi_bio_queue_flusher_should_finish);
  826. if (likely(!toi_bio_queue_flusher_should_finish))
  827. goto top;
  828. toi_bio_queue_flusher_should_finish = 0;
  829. }
  830. mutex_unlock(&busy);
  831. return result;
  832. }
  833. /**
  834. * toi_bio_get_new_page - get a new page for I/O
  835. * @full_buffer: Pointer to a page to allocate.
  836. **/
  837. static int toi_bio_get_new_page(char **full_buffer)
  838. {
  839. int result = throttle_if_needed(THROTTLE_WAIT);
  840. if (result)
  841. return result;
  842. while (!*full_buffer) {
  843. *full_buffer = (char *)toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
  844. if (!*full_buffer) {
  845. set_free_mem_throttle();
  846. do_bio_wait(3);
  847. }
  848. }
  849. return 0;
  850. }
  851. /**
  852. * toi_rw_buffer - combine smaller buffers into PAGE_SIZE I/O
  853. * @writing: Bool - whether writing (or reading).
  854. * @buffer: The start of the buffer to write or fill.
  855. * @buffer_size: The size of the buffer to write or fill.
  856. * @no_readahead: Don't try to start readhead (when getting extents).
  857. **/
  858. static int toi_rw_buffer(int writing, char *buffer, int buffer_size, int no_readahead)
  859. {
  860. int bytes_left = buffer_size, result = 0;
  861. while (bytes_left) {
  862. char *source_start = buffer + buffer_size - bytes_left;
  863. char *dest_start = toi_writer_buffer + toi_writer_buffer_posn;
  864. int capacity = PAGE_SIZE - toi_writer_buffer_posn;
  865. char *to = writing ? dest_start : source_start;
  866. char *from = writing ? source_start : dest_start;
  867. if (bytes_left <= capacity) {
  868. memcpy(to, from, bytes_left);
  869. toi_writer_buffer_posn += bytes_left;
  870. return 0;
  871. }
  872. /* Complete this page and start a new one */
  873. memcpy(to, from, capacity);
  874. bytes_left -= capacity;
  875. if (!writing) {
  876. /*
  877. * Perform actual I/O:
  878. * read readahead_list_head into toi_writer_buffer
  879. */
  880. int result = toi_bio_get_next_page_read(no_readahead);
  881. if (result) {
  882. pr_warn("toi_bio_get_next_page_read " "returned %d.\n", result);
  883. return result;
  884. }
  885. } else {
  886. toi_bio_queue_write(&toi_writer_buffer);
  887. result = toi_bio_get_new_page(&toi_writer_buffer);
  888. if (result) {
  889. pr_err("toi_bio_get_new_page returned " "%d.\n", result);
  890. return result;
  891. }
  892. }
  893. toi_writer_buffer_posn = 0;
  894. toi_cond_pause(0, NULL);
  895. }
  896. return 0;
  897. }
  898. /**
  899. * toi_bio_read_page - read a page of the image
  900. * @pfn: The pfn where the data belongs.
  901. * @buffer_page: The page containing the (possibly compressed) data.
  902. * @buf_size: The number of bytes on @buffer_page used (PAGE_SIZE).
  903. *
  904. * Read a (possibly compressed) page from the image, into buffer_page,
  905. * returning its pfn and the buffer size.
  906. **/
  907. static int toi_bio_read_page(unsigned long *pfn, int buf_type,
  908. void *buffer_page, unsigned int *buf_size)
  909. {
  910. int result = 0;
  911. int this_idx;
  912. char *buffer_virt = TOI_MAP(buf_type, buffer_page);
  913. /*
  914. * Only call start_new_readahead if we don't have a dedicated thread
  915. * and we're the queue flusher.
  916. */
  917. if (current == toi_queue_flusher && more_readahead && !test_action_state(TOI_NO_READAHEAD)) {
  918. int result2 = toi_start_new_readahead(0);
  919. if (result2) {
  920. pr_debug("Queue flusher and toi_start_one_readahead returned non-zero.\n");
  921. result = -EIO;
  922. goto out;
  923. }
  924. }
  925. my_mutex_lock(0, &toi_bio_mutex);
  926. /*
  927. * Structure in the image:
  928. * [destination pfn|page size|page data]
  929. * buf_size is PAGE_SIZE
  930. * We can validly find there's nothing to read in a multithreaded
  931. * situation.
  932. */
  933. if (toi_rw_buffer(READ, (char *)&this_idx, sizeof(int), 0) ||
  934. toi_rw_buffer(READ, (char *)pfn, sizeof(unsigned long), 0) ||
  935. toi_rw_buffer(READ, (char *)buf_size, sizeof(int), 0) ||
  936. toi_rw_buffer(READ, buffer_virt, *buf_size, 0)) {
  937. result = -ENODATA;
  938. goto out_unlock;
  939. }
  940. if (reset_idx) {
  941. page_idx = this_idx;
  942. reset_idx = 0;
  943. } else {
  944. page_idx++;
  945. if (!this_idx)
  946. result = -ENODATA;
  947. else if (page_idx != this_idx)
  948. pr_err("Got page index %d, expected %d.\n", this_idx, page_idx);
  949. }
  950. out_unlock:
  951. my_mutex_unlock(0, &toi_bio_mutex);
  952. out:
  953. TOI_UNMAP(buf_type, buffer_page);
  954. return result;
  955. }
  956. /**
  957. * toi_bio_write_page - write a page of the image
  958. * @pfn: The pfn where the data belongs.
  959. * @buffer_page: The page containing the (possibly compressed) data.
  960. * @buf_size: The number of bytes on @buffer_page used.
  961. *
  962. * Write a (possibly compressed) page to the image from the buffer, together
  963. * with it's index and buffer size.
  964. **/
  965. static int toi_bio_write_page(unsigned long pfn, int buf_type,
  966. void *buffer_page, unsigned int buf_size)
  967. {
  968. char *buffer_virt;
  969. int result = 0, result2 = 0;
  970. if (unlikely(test_action_state(TOI_TEST_FILTER_SPEED)))
  971. return 0;
  972. my_mutex_lock(1, &toi_bio_mutex);
  973. if (test_result_state(TOI_ABORTED)) {
  974. my_mutex_unlock(1, &toi_bio_mutex);
  975. return 0;
  976. }
  977. buffer_virt = TOI_MAP(buf_type, buffer_page);
  978. page_idx++;
  979. /*
  980. * Structure in the image:
  981. * [destination pfn|page size|page data]
  982. * buf_size is PAGE_SIZE
  983. */
  984. if (toi_rw_buffer(WRITE, (char *)&page_idx, sizeof(int), 0) ||
  985. toi_rw_buffer(WRITE, (char *)&pfn, sizeof(unsigned long), 0) ||
  986. toi_rw_buffer(WRITE, (char *)&buf_size, sizeof(int), 0) ||
  987. toi_rw_buffer(WRITE, buffer_virt, buf_size, 0)) {
  988. pr_debug("toi_rw_buffer returned non-zero to " "toi_bio_write_page.\n");
  989. result = -EIO;
  990. }
  991. TOI_UNMAP(buf_type, buffer_page);
  992. my_mutex_unlock(1, &toi_bio_mutex);
  993. if (current == toi_queue_flusher)
  994. result2 = toi_bio_queue_flush_pages(0);
  995. return result ? result : result2;
  996. }
  997. /**
  998. * _toi_rw_header_chunk - read or write a portion of the image header
  999. * @writing: Whether reading or writing.
  1000. * @owner: The module for which we're writing.
  1001. * Used for confirming that modules
  1002. * don't use more header space than they asked for.
  1003. * @buffer: Address of the data to write.
  1004. * @buffer_size: Size of the data buffer.
  1005. * @no_readahead: Don't try to start readhead (when getting extents).
  1006. *
  1007. * Perform PAGE_SIZE I/O. Start readahead if needed.
  1008. **/
  1009. static int _toi_rw_header_chunk(int writing, struct toi_module_ops *owner,
  1010. char *buffer, int buffer_size, int no_readahead)
  1011. {
  1012. int result = 0;
  1013. if (owner) {
  1014. owner->header_used += buffer_size;
  1015. toi_message(TOI_HEADER, TOI_LOW, 1,
  1016. "Header: %s : %d bytes (%d/%d) from offset %d.",
  1017. owner->name,
  1018. buffer_size, owner->header_used,
  1019. owner->header_requested, toi_writer_buffer_posn);
  1020. if (owner->header_used > owner->header_requested && writing) {
  1021. pr_err("TuxOnIce module %s is using more header space (%u) than it requested (%u)",
  1022. owner->name, owner->header_used, owner->header_requested);
  1023. return buffer_size;
  1024. }
  1025. } else {
  1026. unowned += buffer_size;
  1027. toi_message(TOI_HEADER, TOI_LOW, 1,
  1028. "Header: (No owner): %d bytes (%d total so far) from offset %d.",
  1029. buffer_size, unowned, toi_writer_buffer_posn);
  1030. }
  1031. if (!writing && !no_readahead && more_readahead) {
  1032. result = toi_start_new_readahead(0);
  1033. toi_message(TOI_BIO, TOI_VERBOSE, 0, "Start new readahead returned %d.", result);
  1034. }
  1035. if (!result) {
  1036. result = toi_rw_buffer(writing, buffer, buffer_size, no_readahead);
  1037. toi_message(TOI_BIO, TOI_VERBOSE, 0, "rw_buffer returned %d.", result);
  1038. }
  1039. total_header_bytes += buffer_size;
  1040. toi_message(TOI_BIO, TOI_VERBOSE, 0, "_toi_rw_header_chunk returning %d.", result);
  1041. return result;
  1042. }
  1043. static int toi_rw_header_chunk(int writing, struct toi_module_ops *owner, char *buffer, int size)
  1044. {
  1045. return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
  1046. }
  1047. static int toi_rw_header_chunk_noreadahead(int writing,
  1048. struct toi_module_ops *owner, char *buffer, int size)
  1049. {
  1050. return _toi_rw_header_chunk(writing, owner, buffer, size, 1);
  1051. }
  1052. /**
  1053. * toi_bio_storage_needed - get the amount of storage needed for my fns
  1054. **/
  1055. static int toi_bio_storage_needed(void)
  1056. {
  1057. return sizeof(int) + PAGE_SIZE + toi_bio_devinfo_storage_needed();
  1058. }
  1059. /**
  1060. * toi_bio_save_config_info - save block I/O config to image header
  1061. * @buf: PAGE_SIZE'd buffer into which data should be saved.
  1062. **/
  1063. static int toi_bio_save_config_info(char *buf)
  1064. {
  1065. int *ints = (int *)buf;
  1066. ints[0] = target_outstanding_io;
  1067. return sizeof(int);
  1068. }
  1069. /**
  1070. * toi_bio_load_config_info - restore block I/O config
  1071. * @buf: Data to be reloaded.
  1072. * @size: Size of the buffer saved.
  1073. **/
  1074. static void toi_bio_load_config_info(char *buf, int size)
  1075. {
  1076. int *ints = (int *)buf;
  1077. target_outstanding_io = ints[0];
  1078. }
  1079. void close_resume_dev_t(int force)
  1080. {
  1081. if (!resume_block_device)
  1082. return;
  1083. if (force)
  1084. atomic_set(&resume_bdev_open_count, 0);
  1085. else
  1086. atomic_dec(&resume_bdev_open_count);
  1087. if (!atomic_read(&resume_bdev_open_count)) {
  1088. toi_close_bdev(resume_block_device);
  1089. resume_block_device = NULL;
  1090. }
  1091. }
  1092. int open_resume_dev_t(int force, int quiet)
  1093. {
  1094. if (force) {
  1095. close_resume_dev_t(1);
  1096. atomic_set(&resume_bdev_open_count, 1);
  1097. } else
  1098. atomic_inc(&resume_bdev_open_count);
  1099. if (resume_block_device)
  1100. return 0;
  1101. resume_block_device = toi_open_bdev(NULL, resume_dev_t, 0);
  1102. if (IS_ERR(resume_block_device)) {
  1103. if (!quiet)
  1104. toi_early_boot_message(1, TOI_CONTINUE_REQ,
  1105. "Failed to open device %x, where the header should be found.",
  1106. resume_dev_t);
  1107. resume_block_device = NULL;
  1108. atomic_set(&resume_bdev_open_count, 0);
  1109. return 1;
  1110. }
  1111. return 0;
  1112. }
  1113. /**
  1114. * toi_bio_initialise - initialise bio code at start of some action
  1115. * @starting_cycle: Whether starting a hibernation cycle, or just reading or
  1116. * writing a sysfs value.
  1117. **/
  1118. static int toi_bio_initialise(int starting_cycle)
  1119. {
  1120. int result;
  1121. if (!starting_cycle || !resume_dev_t)
  1122. return 0;
  1123. max_outstanding_writes = 0;
  1124. max_outstanding_reads = 0;
  1125. current_stream = 0;
  1126. toi_queue_flusher = current;
  1127. #ifdef MEASURE_MUTEX_CONTENTION
  1128. {
  1129. int i, j, k;
  1130. for (i = 0; i < 2; i++)
  1131. for (j = 0; j < 2; j++)
  1132. for_each_online_cpu(k)
  1133. mutex_times[i][j][k] = 0;
  1134. }
  1135. #endif
  1136. result = open_resume_dev_t(0, 1);
  1137. if (result)
  1138. return result;
  1139. return get_signature_page();
  1140. }
  1141. static unsigned long raw_to_real(unsigned long raw)
  1142. {
  1143. unsigned long extra;
  1144. extra = (raw * (sizeof(unsigned long) + sizeof(int)) +
  1145. (PAGE_SIZE + sizeof(unsigned long) + sizeof(int) + 1)) /
  1146. (PAGE_SIZE + sizeof(unsigned long) + sizeof(int));
  1147. return raw > extra ? raw - extra : 0;
  1148. }
  1149. static unsigned long toi_bio_storage_available(void)
  1150. {
  1151. unsigned long sum = 0;
  1152. struct toi_module_ops *this_module;
  1153. list_for_each_entry(this_module, &toi_modules, module_list) {
  1154. if (!this_module->enabled || this_module->type != BIO_ALLOCATOR_MODULE)
  1155. continue;
  1156. toi_message(TOI_BIO, TOI_VERBOSE, 0, "Seeking storage available from %s.", this_module->name);
  1157. sum += this_module->bio_allocator_ops->storage_available();
  1158. }
  1159. toi_message(TOI_BIO, TOI_VERBOSE, 0, "Total storage available is %lu pages (%lu header pages).",
  1160. sum, header_pages_reserved);
  1161. return sum > header_pages_reserved ? raw_to_real(sum - header_pages_reserved) : 0;
  1162. }
  1163. static unsigned long toi_bio_storage_allocated(void)
  1164. {
  1165. return raw_pages_allocd > header_pages_reserved ?
  1166. raw_to_real(raw_pages_allocd - header_pages_reserved) : 0;
  1167. }
  1168. /*
  1169. * If we have read part of the image, we might have filled memory with
  1170. * data that should be zeroed out.
  1171. */
  1172. static void toi_bio_noresume_reset(void)
  1173. {
  1174. toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_noresume_reset.");
  1175. toi_rw_cleanup(READ);
  1176. free_all_bdev_info();
  1177. }
  1178. /**
  1179. * toi_bio_cleanup - cleanup after some action
  1180. * @finishing_cycle: Whether completing a cycle.
  1181. **/
  1182. static void toi_bio_cleanup(int finishing_cycle)
  1183. {
  1184. if (!finishing_cycle)
  1185. return;
  1186. if (toi_writer_buffer) {
  1187. toi_free_page(11, (unsigned long)toi_writer_buffer);
  1188. toi_writer_buffer = NULL;
  1189. }
  1190. forget_signature_page();
  1191. if (header_block_device && toi_sig_data && toi_sig_data->header_dev_t != resume_dev_t)
  1192. toi_close_bdev(header_block_device);
  1193. header_block_device = NULL;
  1194. #ifdef CONFIG_TOI_FIXUP
  1195. close_resume_dev_t(1);
  1196. #else
  1197. close_resume_dev_t(0);
  1198. #endif
  1199. }
  1200. static int toi_bio_write_header_init(void)
  1201. {
  1202. int result;
  1203. toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_write_header_init");
  1204. toi_rw_init(WRITE, 0);
  1205. toi_writer_buffer_posn = 0;
  1206. /* Info needed to bootstrap goes at the start of the header.
  1207. * First we save the positions and devinfo, including the number
  1208. * of header pages. Then we save the structs containing data needed
  1209. * for reading the header pages back.
  1210. * Note that even if header pages take more than one page, when we
  1211. * read back the info, we will have restored the location of the
  1212. * next header page by the time we go to use it.
  1213. */
  1214. toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise extent chains.");
  1215. result = toi_serialise_extent_chains();
  1216. if (result)
  1217. return result;
  1218. /*
  1219. * Signature page hasn't been modified at this point. Write it in
  1220. * the header so we can restore it later.
  1221. */
  1222. toi_message(TOI_BIO, TOI_VERBOSE, 0, "serialise signature page.");
  1223. return toi_rw_header_chunk_noreadahead(WRITE, &toi_blockwriter_ops,
  1224. (char *)toi_cur_sig_page, PAGE_SIZE);
  1225. }
  1226. static int toi_bio_write_header_cleanup(void)
  1227. {
  1228. int result = 0;
  1229. if (toi_writer_buffer_posn)
  1230. toi_bio_queue_write(&toi_writer_buffer);
  1231. result = toi_finish_all_io();
  1232. unowned = 0;
  1233. total_header_bytes = 0;
  1234. /* Set signature to save we have an image */
  1235. if (!result)
  1236. result = toi_bio_mark_have_image();
  1237. #ifdef CONFIG_MTK_MTD_NAND
  1238. /* FIXME: mtdblock doesn't sync without this */
  1239. blkdev_ioctl(resume_block_device, 0, BLKFLSBUF, 0);
  1240. #endif
  1241. return result;
  1242. }
  1243. /*
  1244. * toi_bio_read_header_init()
  1245. *
  1246. * Description:
  1247. * 1. Attempt to read the device specified with resume=.
  1248. * 2. Check the contents of the swap header for our signature.
  1249. * 3. Warn, ignore, reset and/or continue as appropriate.
  1250. * 4. If continuing, read the toi_swap configuration section
  1251. * of the header and set up block device info so we can read
  1252. * the rest of the header & image.
  1253. *
  1254. * Returns:
  1255. * May not return if user choose to reboot at a warning.
  1256. * -EINVAL if cannot resume at this time. Booting should continue
  1257. * normally.
  1258. */
  1259. static int toi_bio_read_header_init(void)
  1260. {
  1261. int result = 0;
  1262. #ifdef CONFIG_TOI_FIXUP
  1263. char buf[32];
  1264. #endif
  1265. toi_writer_buffer_posn = 0;
  1266. toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_init");
  1267. if (!toi_sig_data) {
  1268. pr_warn("toi_bio_read_header_init called when we haven't verified there is an image!");
  1269. return -EINVAL;
  1270. }
  1271. /*
  1272. * If the header is not on the resume_swap_dev_t, get the resume device
  1273. * first.
  1274. */
  1275. toi_message(TOI_BIO, TOI_VERBOSE, 0, "Header dev_t is %x.", (unsigned int) toi_sig_data->header_dev_t);
  1276. #ifdef CONFIG_TOI_FIXUP
  1277. if (toi_sig_data->have_uuid) {
  1278. struct fs_info seek;
  1279. dev_t device;
  1280. strncpy((char *)seek.uuid, toi_sig_data->header_uuid, 16);
  1281. seek.dev_t = toi_sig_data->header_dev_t;
  1282. seek.last_mount_size = 0;
  1283. device = blk_lookup_fs_info(&seek);
  1284. if (device) {
  1285. pr_warn("Using dev_t %s, returned by blk_lookup_fs_info.\n",
  1286. format_dev_t(buf, device));
  1287. toi_sig_data->header_dev_t = device;
  1288. }
  1289. }
  1290. #endif
  1291. if (toi_sig_data->header_dev_t != resume_dev_t) {
  1292. header_block_device = toi_open_bdev(NULL, toi_sig_data->header_dev_t, 1);
  1293. if (IS_ERR(header_block_device))
  1294. return PTR_ERR(header_block_device);
  1295. } else
  1296. header_block_device = resume_block_device;
  1297. if (!toi_writer_buffer)
  1298. toi_writer_buffer = (char *)toi_get_zeroed_page(11, TOI_ATOMIC_GFP);
  1299. more_readahead = 1;
  1300. /*
  1301. * Read toi_swap configuration.
  1302. * Headerblock size taken into account already.
  1303. */
  1304. result = toi_bio_ops.bdev_page_io(READ, header_block_device,
  1305. toi_sig_data->first_header_block,
  1306. virt_to_page((unsigned long)toi_writer_buffer));
  1307. if (result)
  1308. return result;
  1309. toi_message(TOI_BIO, TOI_VERBOSE, 0, "load extent chains.");
  1310. result = toi_load_extent_chains();
  1311. toi_message(TOI_BIO, TOI_VERBOSE, 0, "load original signature page.");
  1312. toi_orig_sig_page = (char *)toi_get_zeroed_page(38, TOI_ATOMIC_GFP);
  1313. if (!toi_orig_sig_page) {
  1314. pr_err("Failed to allocate memory for the current" " image signature.\n");
  1315. return -ENOMEM;
  1316. }
  1317. return toi_rw_header_chunk_noreadahead(READ, &toi_blockwriter_ops,
  1318. (char *)toi_orig_sig_page, PAGE_SIZE);
  1319. }
  1320. static int toi_bio_read_header_cleanup(void)
  1321. {
  1322. toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_read_header_cleanup.");
  1323. return toi_rw_cleanup(READ);
  1324. }
  1325. /* Works only for digits and letters, but small and fast */
  1326. #define TOLOWER(x) ((x) | 0x20)
  1327. /*
  1328. * UUID must be 32 chars long. It may have dashes, but nothing
  1329. * else.
  1330. */
  1331. char *uuid_from_commandline(char *commandline)
  1332. {
  1333. int low = 0;
  1334. char *result = NULL, *output, *ptr;
  1335. if (strncmp(commandline, "UUID=", 5))
  1336. return NULL;
  1337. result = kzalloc(17, GFP_KERNEL);
  1338. if (!result)
  1339. return NULL;
  1340. ptr = commandline + 5;
  1341. output = result;
  1342. while (*ptr && (output - result) < 16) {
  1343. if (isxdigit(*ptr)) {
  1344. int value = isdigit(*ptr) ? *ptr - '0' : TOLOWER(*ptr) - 'a' + 10;
  1345. if (low) {
  1346. *output += value;
  1347. output++;
  1348. } else {
  1349. *output = value << 4;
  1350. }
  1351. low = !low;
  1352. } else if (*ptr != '-')
  1353. break;
  1354. ptr++;
  1355. }
  1356. if ((output - result) < 16 || *ptr) {
  1357. pr_debug("Found resume=UUID=, but the value looks " "invalid.\n");
  1358. kfree(result);
  1359. result = NULL;
  1360. }
  1361. return result;
  1362. }
  1363. #define retry_if_fails(command) \
  1364. do { \
  1365. command; \
  1366. if (!resume_dev_t && !waited_for_device_probe) { \
  1367. wait_for_device_probe(); \
  1368. command; \
  1369. waited_for_device_probe = 1; \
  1370. } \
  1371. } while (0)
  1372. /**
  1373. * try_to_open_resume_device: Try to parse and open resume=
  1374. *
  1375. * Any "swap:" has been stripped away and we just have the path to deal with.
  1376. * We attempt to do name_to_dev_t, open and stat the file. Having opened the
  1377. * file, get the struct block_device * to match.
  1378. */
  1379. static int try_to_open_resume_device(char *commandline, int quiet)
  1380. {
  1381. struct kstat stat;
  1382. int error = 0;
  1383. char *uuid = uuid_from_commandline(commandline);
  1384. int waited_for_device_probe = 0;
  1385. hib_log("commandline=\"%s\"\n", commandline);
  1386. resume_dev_t = MKDEV(0, 0);
  1387. if (!strlen(commandline)) {
  1388. #ifdef CONFIG_TOI_FIXUP
  1389. hib_warn("skip scanning for image...\n");
  1390. #else
  1391. retry_if_fails(toi_bio_scan_for_image(quiet));
  1392. #endif
  1393. }
  1394. if (uuid) {
  1395. struct fs_info seek;
  1396. strncpy((char *)&seek.uuid, uuid, 16);
  1397. seek.dev_t = resume_dev_t;
  1398. seek.last_mount_size = 0;
  1399. retry_if_fails(resume_dev_t = blk_lookup_fs_info(&seek));
  1400. kfree(uuid);
  1401. }
  1402. if (!resume_dev_t)
  1403. retry_if_fails(resume_dev_t = name_to_dev_t(commandline));
  1404. if (!resume_dev_t) {
  1405. struct file *file = filp_open(commandline,
  1406. O_RDONLY | O_LARGEFILE, 0);
  1407. if (!IS_ERR(file) && file) {
  1408. vfs_getattr(&file->f_path, &stat);
  1409. filp_close(file, NULL);
  1410. } else
  1411. error = vfs_stat(commandline, &stat);
  1412. if (!error)
  1413. resume_dev_t = stat.rdev;
  1414. }
  1415. if (!resume_dev_t) {
  1416. if (quiet)
  1417. return 1;
  1418. if (test_toi_state(TOI_TRYING_TO_RESUME))
  1419. toi_early_boot_message(1, toi_translate_err_default,
  1420. "Failed to translate \"%s\" into a device id.\n",
  1421. commandline);
  1422. #ifdef CONFIG_TOI_FIXUP
  1423. else if (strlen(commandline))
  1424. #else
  1425. else
  1426. #endif
  1427. pr_warn("TuxOnIce: Can't translate \"%s\" into a device id yet.\n", commandline);
  1428. return 1;
  1429. }
  1430. return open_resume_dev_t(1, quiet);
  1431. }
  1432. /*
  1433. * Parse Image Location
  1434. *
  1435. * Attempt to parse a resume= parameter.
  1436. * Swap Writer accepts:
  1437. * resume=[swap:|file:]DEVNAME[:FIRSTBLOCK][@BLOCKSIZE]
  1438. *
  1439. * Where:
  1440. * DEVNAME is convertable to a dev_t by name_to_dev_t
  1441. * FIRSTBLOCK is the location of the first block in the swap file
  1442. * (specifying for a swap partition is nonsensical but not prohibited).
  1443. * Data is validated by attempting to read a swap header from the
  1444. * location given. Failure will result in toi_swap refusing to
  1445. * save an image, and a reboot with correct parameters will be
  1446. * necessary.
  1447. */
  1448. static int toi_bio_parse_sig_location(char *commandline, int only_allocator, int quiet)
  1449. {
  1450. char *thischar, *devstart, *colon = NULL;
  1451. int signature_found, result = -EINVAL, temp_result = 0;
  1452. if (strncmp(commandline, "swap:", 5) && strncmp(commandline, "file:", 5)) {
  1453. /*
  1454. * Failing swap:, we'll take a simple resume=/dev/hda2, or a
  1455. * blank value (scan) but fall through to other allocators
  1456. * if /dev/ or UUID= isn't matched.
  1457. */
  1458. if (strncmp(commandline, "/dev/", 5) &&
  1459. strncmp(commandline, "UUID=", 5) && strlen(commandline))
  1460. return 1;
  1461. } else
  1462. commandline += 5;
  1463. devstart = commandline;
  1464. thischar = commandline;
  1465. while ((*thischar != ':') && (*thischar != '@') &&
  1466. ((thischar - commandline) < 250) && (*thischar))
  1467. thischar++;
  1468. if (*thischar == ':') {
  1469. colon = thischar;
  1470. *colon = 0;
  1471. thischar++;
  1472. }
  1473. while ((thischar - commandline) < 250 && *thischar)
  1474. thischar++;
  1475. if (colon) {
  1476. unsigned long block;
  1477. temp_result = kstrtoul(colon + 1, 0, &block);
  1478. if (!temp_result)
  1479. resume_firstblock = (int)block;
  1480. #ifdef CONFIG_TOI_FIXUP
  1481. } else if (swsusp_resume_block) {
  1482. resume_firstblock = swsusp_resume_block;
  1483. #endif
  1484. } else
  1485. resume_firstblock = 0;
  1486. clear_toi_state(TOI_CAN_HIBERNATE);
  1487. clear_toi_state(TOI_CAN_RESUME);
  1488. if (!temp_result)
  1489. temp_result = try_to_open_resume_device(devstart, quiet);
  1490. if (colon)
  1491. *colon = ':';
  1492. /* No error if we only scanned */
  1493. if (temp_result)
  1494. return strlen(commandline) ? -EINVAL : 1;
  1495. signature_found = toi_bio_image_exists(quiet);
  1496. if (signature_found != -1) {
  1497. result = 0;
  1498. /*
  1499. * TODO: If only file storage, CAN_HIBERNATE should only be
  1500. * set if file allocator's target is valid.
  1501. */
  1502. set_toi_state(TOI_CAN_HIBERNATE);
  1503. set_toi_state(TOI_CAN_RESUME);
  1504. } else if (!quiet)
  1505. pr_err("TuxOnIce: Block I/O: No " "signature found at %s.\n", devstart);
  1506. return result;
  1507. }
  1508. static void toi_bio_release_storage(void)
  1509. {
  1510. header_pages_reserved = 0;
  1511. raw_pages_allocd = 0;
  1512. free_all_bdev_info();
  1513. }
  1514. /* toi_swap_remove_image
  1515. *
  1516. */
  1517. static int toi_bio_remove_image(void)
  1518. {
  1519. int result;
  1520. toi_message(TOI_BIO, TOI_VERBOSE, 0, "toi_bio_remove_image.");
  1521. result = toi_bio_restore_original_signature();
  1522. /*
  1523. * We don't do a sanity check here: we want to restore the swap
  1524. * whatever version of kernel made the hibernate image.
  1525. *
  1526. * We need to write swap, but swap may not be enabled so
  1527. * we write the device directly
  1528. *
  1529. * If we don't have an current_signature_page, we didn't
  1530. * read an image header, so don't change anything.
  1531. */
  1532. toi_bio_release_storage();
  1533. return result;
  1534. }
  1535. struct toi_bio_ops toi_bio_ops = {
  1536. .bdev_page_io = toi_bdev_page_io,
  1537. .register_storage = toi_register_storage_chain,
  1538. .free_storage = toi_bio_release_storage,
  1539. };
  1540. EXPORT_SYMBOL_GPL(toi_bio_ops);
  1541. static struct toi_sysfs_data sysfs_params[] = {
  1542. SYSFS_INT("target_outstanding_io", SYSFS_RW, &target_outstanding_io,
  1543. 0, 16384, 0, NULL),
  1544. };
  1545. struct toi_module_ops toi_blockwriter_ops = {
  1546. .type = WRITER_MODULE,
  1547. .name = "block i/o",
  1548. .directory = "block_io",
  1549. .module = THIS_MODULE,
  1550. .memory_needed = toi_bio_memory_needed,
  1551. .print_debug_info = toi_bio_print_debug_stats,
  1552. .storage_needed = toi_bio_storage_needed,
  1553. .save_config_info = toi_bio_save_config_info,
  1554. .load_config_info = toi_bio_load_config_info,
  1555. .initialise = toi_bio_initialise,
  1556. .cleanup = toi_bio_cleanup,
  1557. .post_atomic_restore = toi_bio_chains_post_atomic,
  1558. .rw_init = toi_rw_init,
  1559. .rw_cleanup = toi_rw_cleanup,
  1560. .read_page = toi_bio_read_page,
  1561. .write_page = toi_bio_write_page,
  1562. .rw_header_chunk = toi_rw_header_chunk,
  1563. .rw_header_chunk_noreadahead = toi_rw_header_chunk_noreadahead,
  1564. .io_flusher = bio_io_flusher,
  1565. .update_throughput_throttle = update_throughput_throttle,
  1566. .finish_all_io = toi_finish_all_io,
  1567. .noresume_reset = toi_bio_noresume_reset,
  1568. .storage_available = toi_bio_storage_available,
  1569. .storage_allocated = toi_bio_storage_allocated,
  1570. .reserve_header_space = toi_bio_reserve_header_space,
  1571. .allocate_storage = toi_bio_allocate_storage,
  1572. .image_exists = toi_bio_image_exists,
  1573. .mark_resume_attempted = toi_bio_mark_resume_attempted,
  1574. .write_header_init = toi_bio_write_header_init,
  1575. .write_header_cleanup = toi_bio_write_header_cleanup,
  1576. .read_header_init = toi_bio_read_header_init,
  1577. .read_header_cleanup = toi_bio_read_header_cleanup,
  1578. .get_header_version = toi_bio_get_header_version,
  1579. .remove_image = toi_bio_remove_image,
  1580. .parse_sig_location = toi_bio_parse_sig_location,
  1581. .sysfs_data = sysfs_params,
  1582. .num_sysfs_entries = sizeof(sysfs_params) / sizeof(struct toi_sysfs_data),
  1583. };
  1584. /**
  1585. * toi_block_io_load - load time routine for block I/O module
  1586. *
  1587. * Register block i/o ops and sysfs entries.
  1588. **/
  1589. static __init int toi_block_io_load(void)
  1590. {
  1591. return toi_register_module(&toi_blockwriter_ops);
  1592. }
  1593. #ifdef MODULE
  1594. static __exit void toi_block_io_unload(void)
  1595. {
  1596. toi_unregister_module(&toi_blockwriter_ops);
  1597. }
  1598. module_init(toi_block_io_load);
  1599. module_exit(toi_block_io_unload);
  1600. MODULE_LICENSE("GPL");
  1601. MODULE_AUTHOR("Nigel Cunningham");
  1602. MODULE_DESCRIPTION("TuxOnIce block io functions");
  1603. #else
  1604. late_initcall(toi_block_io_load);
  1605. #endif