tuxonice_io.c 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855
  1. /*
  2. * kernel/power/tuxonice_io.c
  3. *
  4. * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
  5. * Copyright (C) 1998,2001,2002 Pavel Machek <pavel@suse.cz>
  6. * Copyright (C) 2002-2003 Florent Chabaud <fchabaud@free.fr>
  7. * Copyright (C) 2002-2014 Nigel Cunningham (nigel at tuxonice net)
  8. *
  9. * This file is released under the GPLv2.
  10. *
  11. * It contains high level IO routines for hibernating.
  12. *
  13. */
  14. #include <linux/suspend.h>
  15. #include <linux/version.h>
  16. #include <linux/utsname.h>
  17. #include <linux/mount.h>
  18. #include <linux/highmem.h>
  19. #include <linux/kthread.h>
  20. #include <linux/cpu.h>
  21. #include <linux/fs_struct.h>
  22. #include <linux/bio.h>
  23. #include <linux/fs_uuid.h>
  24. #include <asm/tlbflush.h>
  25. #include "tuxonice.h"
  26. #include "tuxonice_modules.h"
  27. #include "tuxonice_pageflags.h"
  28. #include "tuxonice_io.h"
  29. #include "tuxonice_ui.h"
  30. #include "tuxonice_storage.h"
  31. #include "tuxonice_prepare_image.h"
  32. #include "tuxonice_extent.h"
  33. #include "tuxonice_sysfs.h"
  34. #include "tuxonice_builtin.h"
  35. #include "tuxonice_checksum.h"
  36. #include "tuxonice_alloc.h"
  37. char alt_resume_param[256];
  38. /* Version read from image header at resume */
  39. static int toi_image_header_version;
  40. #define read_if_version(VERS, VAR, DESC, ERR_ACT) do { \
  41. if (likely(toi_image_header_version >= VERS)) \
  42. if (toiActiveAllocator->rw_header_chunk(READ, NULL, \
  43. (char *) &VAR, sizeof(VAR))) { \
  44. abort_hibernate(TOI_FAILED_IO, "Failed to read DESC."); \
  45. ERR_ACT; \
  46. } \
  47. } while (0) \
  48. /* Variables shared between threads and updated under the mutex */
  49. static int io_write, io_finish_at, io_base, io_barmax, io_pageset, io_result;
  50. static int io_index, io_nextupdate, io_pc, io_pc_step;
  51. static DEFINE_MUTEX(io_mutex);
  52. static DEFINE_PER_CPU(struct page *, last_sought);
  53. static DEFINE_PER_CPU(struct page *, last_high_page);
  54. static DEFINE_PER_CPU(char *, checksum_locn);
  55. static DEFINE_PER_CPU(struct pbe *, last_low_page);
  56. static atomic_t io_count;
  57. atomic_t toi_io_workers;
  58. EXPORT_SYMBOL_GPL(toi_io_workers);
  59. static int using_flusher;
  60. DECLARE_WAIT_QUEUE_HEAD(toi_io_queue_flusher);
  61. EXPORT_SYMBOL_GPL(toi_io_queue_flusher);
  62. int toi_bio_queue_flusher_should_finish;
  63. EXPORT_SYMBOL_GPL(toi_bio_queue_flusher_should_finish);
  64. int toi_max_workers;
  65. static char *image_version_error = "The image header version is newer than this kernel supports.";
  66. struct toi_module_ops *first_filter;
  67. static atomic_t toi_num_other_threads;
  68. static DECLARE_WAIT_QUEUE_HEAD(toi_worker_wait_queue);
  69. enum toi_worker_commands {
  70. TOI_IO_WORKER_STOP,
  71. TOI_IO_WORKER_RUN,
  72. TOI_IO_WORKER_EXIT
  73. };
  74. static enum toi_worker_commands toi_worker_command;
  75. /**
  76. * toi_attempt_to_parse_resume_device - determine if we can hibernate
  77. *
  78. * Can we hibernate, using the current resume= parameter?
  79. **/
  80. int toi_attempt_to_parse_resume_device(int quiet)
  81. {
  82. struct list_head *Allocator;
  83. struct toi_module_ops *thisAllocator;
  84. int result, returning = 0;
  85. if (toi_activate_storage(0))
  86. return 0;
  87. toiActiveAllocator = NULL;
  88. clear_toi_state(TOI_RESUME_DEVICE_OK);
  89. clear_toi_state(TOI_CAN_RESUME);
  90. clear_result_state(TOI_ABORTED);
  91. if (!toiNumAllocators) {
  92. if (!quiet)
  93. pr_warn("TuxOnIce: No storage allocators have been registered. Hibernating will be disabled");
  94. goto cleanup;
  95. }
  96. list_for_each(Allocator, &toiAllocators) {
  97. thisAllocator = list_entry(Allocator, struct toi_module_ops, type_list);
  98. /*
  99. * Not sure why you'd want to disable an allocator, but
  100. * we should honour the flag if we're providing it
  101. */
  102. if (!thisAllocator->enabled)
  103. continue;
  104. result = thisAllocator->parse_sig_location(resume_file, (toiNumAllocators == 1),
  105. quiet);
  106. switch (result) {
  107. case -EINVAL:
  108. /* For this allocator, but not a valid
  109. * configuration. Error already printed. */
  110. goto cleanup;
  111. case 0:
  112. /* For this allocator and valid. */
  113. toiActiveAllocator = thisAllocator;
  114. set_toi_state(TOI_RESUME_DEVICE_OK);
  115. set_toi_state(TOI_CAN_RESUME);
  116. returning = 1;
  117. goto cleanup;
  118. }
  119. }
  120. if (!quiet)
  121. pr_warn("TuxOnIce: No matching enabled allocator found. Resuming disabled");
  122. cleanup:
  123. toi_deactivate_storage(0);
  124. return returning;
  125. }
  126. EXPORT_SYMBOL_GPL(toi_attempt_to_parse_resume_device);
  127. void attempt_to_parse_resume_device2(void)
  128. {
  129. toi_prepare_usm();
  130. toi_attempt_to_parse_resume_device(0);
  131. toi_cleanup_usm();
  132. }
  133. EXPORT_SYMBOL_GPL(attempt_to_parse_resume_device2);
  134. void save_restore_alt_param(int replace, int quiet)
  135. {
  136. static char resume_param_save[256];
  137. static unsigned long toi_state_save;
  138. if (replace) {
  139. toi_state_save = toi_state;
  140. strcpy(resume_param_save, resume_file);
  141. strcpy(resume_file, alt_resume_param);
  142. } else {
  143. strcpy(resume_file, resume_param_save);
  144. toi_state = toi_state_save;
  145. }
  146. toi_attempt_to_parse_resume_device(quiet);
  147. }
  148. void attempt_to_parse_alt_resume_param(void)
  149. {
  150. int ok = 0;
  151. /* Temporarily set resume_param to the poweroff value */
  152. if (!strlen(alt_resume_param))
  153. return;
  154. pr_warn("=== Trying Poweroff Resume2 ===\n");
  155. save_restore_alt_param(SAVE, NOQUIET);
  156. if (test_toi_state(TOI_CAN_RESUME))
  157. ok = 1;
  158. pr_warn("=== Done ===\n");
  159. save_restore_alt_param(RESTORE, QUIET);
  160. /* If not ok, clear the string */
  161. if (ok)
  162. return;
  163. pr_warn("Can't resume from that location; clearing " "alt_resume_param.\n");
  164. alt_resume_param[0] = '\0';
  165. }
  166. /**
  167. * noresume_reset_modules - reset data structures in case of non resuming
  168. *
  169. * When we read the start of an image, modules (and especially the
  170. * active allocator) might need to reset data structures if we
  171. * decide to remove the image rather than resuming from it.
  172. **/
  173. static void noresume_reset_modules(void)
  174. {
  175. struct toi_module_ops *this_filter;
  176. list_for_each_entry(this_filter, &toi_filters, type_list)
  177. if (this_filter->noresume_reset)
  178. this_filter->noresume_reset();
  179. if (toiActiveAllocator && toiActiveAllocator->noresume_reset)
  180. toiActiveAllocator->noresume_reset();
  181. }
  182. /**
  183. * fill_toi_header - fill the hibernate header structure
  184. * @struct toi_header: Header data structure to be filled.
  185. **/
  186. static int fill_toi_header(struct toi_header *sh)
  187. {
  188. int i, error;
  189. error = init_header((struct swsusp_info *)sh);
  190. if (error)
  191. return error;
  192. sh->pagedir = pagedir1;
  193. sh->pageset_2_size = pagedir2.size;
  194. sh->param0 = toi_result;
  195. sh->param1 = toi_bkd.toi_action;
  196. sh->param2 = toi_bkd.toi_debug_state;
  197. sh->param3 = toi_bkd.toi_default_console_level;
  198. sh->root_fs = current->fs->root.mnt->mnt_sb->s_dev;
  199. for (i = 0; i < 4; i++)
  200. sh->io_time[i / 2][i % 2] = toi_bkd.toi_io_time[i / 2][i % 2];
  201. sh->bkd = boot_kernel_data_buffer;
  202. return 0;
  203. }
  204. /**
  205. * rw_init_modules - initialize modules
  206. * @rw: Whether we are reading of writing an image.
  207. * @which: Section of the image being processed.
  208. *
  209. * Iterate over modules, preparing the ones that will be used to read or write
  210. * data.
  211. **/
  212. static int rw_init_modules(int rw, int which)
  213. {
  214. struct toi_module_ops *this_module;
  215. /* Initialise page transformers */
  216. list_for_each_entry(this_module, &toi_filters, type_list) {
  217. if (!this_module->enabled)
  218. continue;
  219. if (this_module->rw_init && this_module->rw_init(rw, which)) {
  220. abort_hibernate(TOI_FAILED_MODULE_INIT,
  221. "Failed to initialize the %s filter.", this_module->name);
  222. return 1;
  223. }
  224. }
  225. /* Initialise allocator */
  226. if (toiActiveAllocator->rw_init(rw, which)) {
  227. abort_hibernate(TOI_FAILED_MODULE_INIT, "Failed to initialise the allocator.");
  228. return 1;
  229. }
  230. /* Initialise other modules */
  231. list_for_each_entry(this_module, &toi_modules, module_list) {
  232. if (!this_module->enabled ||
  233. this_module->type == FILTER_MODULE || this_module->type == WRITER_MODULE)
  234. continue;
  235. if (this_module->rw_init && this_module->rw_init(rw, which)) {
  236. set_abort_result(TOI_FAILED_MODULE_INIT);
  237. pr_warn("Setting aborted flag due to module " "init failure.\n");
  238. return 1;
  239. }
  240. }
  241. return 0;
  242. }
  243. /**
  244. * rw_cleanup_modules - cleanup modules
  245. * @rw: Whether we are reading of writing an image.
  246. *
  247. * Cleanup components after reading or writing a set of pages.
  248. * Only the allocator may fail.
  249. **/
  250. static int rw_cleanup_modules(int rw)
  251. {
  252. struct toi_module_ops *this_module;
  253. int result = 0;
  254. /* Cleanup other modules */
  255. list_for_each_entry(this_module, &toi_modules, module_list) {
  256. if (!this_module->enabled ||
  257. this_module->type == FILTER_MODULE || this_module->type == WRITER_MODULE)
  258. continue;
  259. if (this_module->rw_cleanup)
  260. result |= this_module->rw_cleanup(rw);
  261. }
  262. /* Flush data and cleanup */
  263. list_for_each_entry(this_module, &toi_filters, type_list) {
  264. if (!this_module->enabled)
  265. continue;
  266. if (this_module->rw_cleanup)
  267. result |= this_module->rw_cleanup(rw);
  268. }
  269. result |= toiActiveAllocator->rw_cleanup(rw);
  270. return result;
  271. }
  272. static struct page *copy_page_from_orig_page(struct page *orig_page, int is_high)
  273. {
  274. int index, min, max;
  275. struct page *high_page = NULL,
  276. **my_last_high_page = &__get_cpu_var(last_high_page),
  277. **my_last_sought = &__get_cpu_var(last_sought);
  278. struct pbe *this, **my_last_low_page = &__get_cpu_var(last_low_page);
  279. void *compare;
  280. if (is_high) {
  281. if (*my_last_sought && *my_last_high_page && *my_last_sought < orig_page)
  282. high_page = *my_last_high_page;
  283. else
  284. high_page = (struct page *)restore_highmem_pblist;
  285. this = (struct pbe *)kmap(high_page);
  286. compare = orig_page;
  287. } else {
  288. if (*my_last_sought && *my_last_low_page && *my_last_sought < orig_page)
  289. this = *my_last_low_page;
  290. else
  291. this = restore_pblist;
  292. compare = page_address(orig_page);
  293. }
  294. *my_last_sought = orig_page;
  295. /* Locate page containing pbe */
  296. while (this[PBES_PER_PAGE - 1].next && this[PBES_PER_PAGE - 1].orig_address < compare) {
  297. if (is_high) {
  298. struct page *next_high_page = (struct page *)
  299. this[PBES_PER_PAGE - 1].next;
  300. kunmap(high_page);
  301. this = kmap(next_high_page);
  302. high_page = next_high_page;
  303. } else
  304. this = this[PBES_PER_PAGE - 1].next;
  305. }
  306. /* Do a binary search within the page */
  307. min = 0;
  308. max = PBES_PER_PAGE;
  309. index = PBES_PER_PAGE / 2;
  310. while (max - min) {
  311. if (this[index].orig_address == compare) {
  312. if (is_high) {
  313. struct page *page = this[index].address;
  314. *my_last_high_page = high_page;
  315. kunmap(high_page);
  316. return page;
  317. }
  318. *my_last_low_page = this;
  319. return virt_to_page(this[index].address);
  320. } else if (!this[index].orig_address || this[index].orig_address > compare)
  321. max = index;
  322. else
  323. min = index;
  324. index = ((max + min) / 2);
  325. };
  326. if (is_high)
  327. kunmap(high_page);
  328. abort_hibernate(TOI_FAILED_IO, "Failed to get dest. for orig page %p. This[min].orig_address=%p",
  329. orig_page, this[index].orig_address);
  330. return NULL;
  331. }
  332. /**
  333. * write_next_page - write the next page in a pageset
  334. * @data_pfn: The pfn where the next data to write is located.
  335. * @my_io_index: The index of the page in the pageset.
  336. * @write_pfn: The pfn number to write in the image (where the data belongs).
  337. *
  338. * Get the pfn of the next page to write, map the page if necessary and do the
  339. * write.
  340. **/
  341. static int write_next_page(unsigned long *data_pfn, int *my_io_index, unsigned long *write_pfn)
  342. {
  343. struct page *page;
  344. char **my_checksum_locn = &__get_cpu_var(checksum_locn);
  345. int result = 0, was_present;
  346. *data_pfn = memory_bm_next_pfn(io_map, 0);
  347. /* Another thread could have beaten us to it. */
  348. if (*data_pfn == BM_END_OF_MAP) {
  349. if (atomic_read(&io_count)) {
  350. pr_warn("Ran out of pfns but io_count is still %d.\n",
  351. atomic_read(&io_count));
  352. BUG();
  353. }
  354. mutex_unlock(&io_mutex);
  355. return -ENODATA;
  356. }
  357. *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
  358. memory_bm_clear_bit(io_map, 0, *data_pfn);
  359. page = pfn_to_page(*data_pfn);
  360. was_present = kernel_page_present(page);
  361. if (!was_present)
  362. kernel_map_pages(page, 1, 1);
  363. if (io_pageset == 1)
  364. *write_pfn = memory_bm_next_pfn(pageset1_map, 0);
  365. else {
  366. *write_pfn = *data_pfn;
  367. *my_checksum_locn = tuxonice_get_next_checksum();
  368. }
  369. TOI_TRACE_DEBUG(*data_pfn, "_PS%d_write %d", io_pageset, *my_io_index);
  370. mutex_unlock(&io_mutex);
  371. if (io_pageset == 2 && tuxonice_calc_checksum(page, *my_checksum_locn))
  372. return 1;
  373. result = first_filter->write_page(*write_pfn, TOI_PAGE, page, PAGE_SIZE);
  374. if (!was_present)
  375. kernel_map_pages(page, 1, 0);
  376. return result;
  377. }
  378. /**
  379. * read_next_page - read the next page in a pageset
  380. * @my_io_index: The index of the page in the pageset.
  381. * @write_pfn: The pfn in which the data belongs.
  382. *
  383. * Read a page of the image into our buffer. It can happen (here and in the
  384. * write routine) that threads don't get run until after other CPUs have done
  385. * all the work. This was the cause of the long standing issue with
  386. * occasionally getting -ENODATA errors at the end of reading the image. We
  387. * therefore need to check there's actually a page to read before trying to
  388. * retrieve one.
  389. **/
  390. static int read_next_page(int *my_io_index, unsigned long *write_pfn, struct page *buffer)
  391. {
  392. unsigned int buf_size = PAGE_SIZE;
  393. unsigned long left = atomic_read(&io_count);
  394. if (!left)
  395. return -ENODATA;
  396. /* Start off assuming the page we read isn't resaved */
  397. *my_io_index = io_finish_at - atomic_sub_return(1, &io_count);
  398. mutex_unlock(&io_mutex);
  399. /*
  400. * Are we aborting? If so, don't submit any more I/O as
  401. * resetting the resume_attempted flag (from ui.c) will
  402. * clear the bdev flags, making this thread oops.
  403. */
  404. if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
  405. atomic_dec(&toi_io_workers);
  406. if (!atomic_read(&toi_io_workers)) {
  407. /*
  408. * So we can be sure we'll have memory for
  409. * marking that we haven't resumed.
  410. */
  411. rw_cleanup_modules(READ);
  412. set_toi_state(TOI_IO_STOPPED);
  413. }
  414. while (1)
  415. schedule();
  416. }
  417. /*
  418. * See toi_bio_read_page in tuxonice_bio.c:
  419. * read the next page in the image.
  420. */
  421. return first_filter->read_page(write_pfn, TOI_PAGE, buffer, &buf_size);
  422. }
  423. static void use_read_page(unsigned long write_pfn, struct page *buffer)
  424. {
  425. struct page *final_page = pfn_to_page(write_pfn), *copy_page = final_page;
  426. char *virt, *buffer_virt;
  427. int was_present, cpu = smp_processor_id();
  428. unsigned long idx = 0;
  429. if (io_pageset == 1 && (!pageset1_copy_map ||
  430. !memory_bm_test_bit(pageset1_copy_map, cpu, write_pfn))) {
  431. int is_high = PageHighMem(final_page);
  432. copy_page =
  433. copy_page_from_orig_page(is_high ? (void *)write_pfn : final_page, is_high);
  434. }
  435. if (!memory_bm_test_bit(io_map, cpu, write_pfn)) {
  436. int test = !memory_bm_test_bit(io_map, cpu, write_pfn);
  437. toi_message(TOI_IO, TOI_VERBOSE, 0, "Discard %ld (%d).", write_pfn, test);
  438. mutex_lock(&io_mutex);
  439. idx = atomic_add_return(1, &io_count);
  440. mutex_unlock(&io_mutex);
  441. return;
  442. }
  443. virt = kmap(copy_page);
  444. buffer_virt = kmap(buffer);
  445. was_present = kernel_page_present(copy_page);
  446. if (!was_present)
  447. kernel_map_pages(copy_page, 1, 1);
  448. memcpy(virt, buffer_virt, PAGE_SIZE);
  449. if (!was_present)
  450. kernel_map_pages(copy_page, 1, 0);
  451. kunmap(copy_page);
  452. kunmap(buffer);
  453. memory_bm_clear_bit(io_map, cpu, write_pfn);
  454. TOI_TRACE_DEBUG(write_pfn, "_PS%d_read", io_pageset);
  455. }
  456. static unsigned long status_update(int writing, unsigned long done, unsigned long ticks)
  457. {
  458. int cs_index = writing ? 0 : 1;
  459. unsigned long ticks_so_far = toi_bkd.toi_io_time[cs_index][1] + ticks;
  460. unsigned long msec = jiffies_to_msecs(abs(ticks_so_far));
  461. unsigned long pgs_per_s, estimate = 0, pages_left;
  462. if (msec) {
  463. pages_left = io_barmax - done;
  464. pgs_per_s = 1000 * done / msec;
  465. if (pgs_per_s)
  466. estimate = DIV_ROUND_UP(pages_left, pgs_per_s);
  467. }
  468. if (estimate && ticks > HZ / 2)
  469. return toi_update_status(done, io_barmax,
  470. " %d/%d MB (%lu sec left)",
  471. MB(done + 1), MB(io_barmax), estimate);
  472. return toi_update_status(done, io_barmax, " %d/%d MB", MB(done + 1), MB(io_barmax));
  473. }
  474. /**
  475. * worker_rw_loop - main loop to read/write pages
  476. *
  477. * The main I/O loop for reading or writing pages. The io_map bitmap is used to
  478. * track the pages to read/write.
  479. * If we are reading, the pages are loaded to their final (mapped) pfn.
  480. * Data is non zero iff this is a thread started via start_other_threads.
  481. * In that case, we stay in here until told to quit.
  482. **/
  483. static int worker_rw_loop(void *data)
  484. {
  485. unsigned long data_pfn, write_pfn, next_jiffies = jiffies + HZ / 4,
  486. jif_index = 1, start_time = jiffies, thread_num;
  487. int result = 0, my_io_index = 0, last_worker;
  488. struct page *buffer = toi_alloc_page(28, TOI_ATOMIC_GFP);
  489. cpumask_var_t orig_mask;
  490. if (!alloc_cpumask_var(&orig_mask, GFP_KERNEL)) {
  491. pr_err("Failed to allocate cpumask for TuxOnIce I/O thread %ld.\n",
  492. (unsigned long)data);
  493. return -ENOMEM;
  494. }
  495. cpumask_copy(orig_mask, tsk_cpus_allowed(current));
  496. current->flags |= PF_NOFREEZE;
  497. top:
  498. mutex_lock(&io_mutex);
  499. thread_num = atomic_read(&toi_io_workers);
  500. cpumask_copy(tsk_cpus_allowed(current), orig_mask);
  501. schedule();
  502. atomic_inc(&toi_io_workers);
  503. while (atomic_read(&io_count) >= atomic_read(&toi_io_workers) &&
  504. !(io_write && test_result_state(TOI_ABORTED)) &&
  505. toi_worker_command == TOI_IO_WORKER_RUN) {
  506. if (!thread_num && time_after(jiffies, next_jiffies)) {
  507. next_jiffies += HZ / 4;
  508. if (toiActiveAllocator->update_throughput_throttle)
  509. toiActiveAllocator->update_throughput_throttle(jif_index);
  510. jif_index++;
  511. }
  512. /*
  513. * What page to use? If reading, don't know yet which page's
  514. * data will be read, so always use the buffer. If writing,
  515. * use the copy (Pageset1) or original page (Pageset2), but
  516. * always write the pfn of the original page.
  517. */
  518. if (io_write)
  519. result = write_next_page(&data_pfn, &my_io_index, &write_pfn);
  520. else /* Reading */
  521. result = read_next_page(&my_io_index, &write_pfn, buffer);
  522. if (result) {
  523. mutex_lock(&io_mutex);
  524. /* Nothing to do? */
  525. if (result == -ENODATA) {
  526. toi_message(TOI_IO, TOI_VERBOSE, 0,
  527. "Thread %d has no more work.", smp_processor_id());
  528. break;
  529. }
  530. io_result = result;
  531. if (io_write) {
  532. pr_warn("Write chunk returned %d.\n", result);
  533. abort_hibernate(TOI_FAILED_IO,
  534. "Failed to write a chunk of the " "image.");
  535. break;
  536. }
  537. if (io_pageset == 1) {
  538. pr_err("\nBreaking out of I/O loop because of result code %d.\n", result);
  539. break;
  540. }
  541. panic("Read chunk returned (%d)", result);
  542. }
  543. /*
  544. * Discard reads of resaved pages while reading ps2
  545. * and unwanted pages while rereading ps2 when aborting.
  546. */
  547. if (!io_write) {
  548. if (!PageResave(pfn_to_page(write_pfn)))
  549. use_read_page(write_pfn, buffer);
  550. else {
  551. mutex_lock(&io_mutex);
  552. toi_message(TOI_IO, TOI_VERBOSE, 0, "Resaved %ld.", write_pfn);
  553. atomic_inc(&io_count);
  554. mutex_unlock(&io_mutex);
  555. }
  556. }
  557. if (!thread_num) {
  558. if (my_io_index + io_base > io_nextupdate)
  559. io_nextupdate = status_update(io_write,
  560. my_io_index + io_base,
  561. jiffies - start_time);
  562. if (my_io_index > io_pc) {
  563. pr_warn("...%d%%", 20 * io_pc_step);
  564. io_pc_step++;
  565. io_pc = io_finish_at * io_pc_step / 5;
  566. }
  567. }
  568. toi_cond_pause(0, NULL);
  569. /*
  570. * Subtle: If there's less I/O still to be done than threads
  571. * running, quit. This stops us doing I/O beyond the end of
  572. * the image when reading.
  573. *
  574. * Possible race condition. Two threads could do the test at
  575. * the same time; one should exit and one should continue.
  576. * Therefore we take the mutex before comparing and exiting.
  577. */
  578. mutex_lock(&io_mutex);
  579. }
  580. last_worker = atomic_dec_and_test(&toi_io_workers);
  581. toi_message(TOI_IO, TOI_VERBOSE, 0, "%d workers left.", atomic_read(&toi_io_workers));
  582. mutex_unlock(&io_mutex);
  583. if ((unsigned long)data && toi_worker_command != TOI_IO_WORKER_EXIT) {
  584. /* Were we the last thread and we're using a flusher thread? */
  585. if (last_worker && using_flusher)
  586. toiActiveAllocator->finish_all_io();
  587. /* First, if we're doing I/O, wait for it to finish */
  588. wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_RUN);
  589. /* Then wait to be told what to do next */
  590. wait_event(toi_worker_wait_queue, toi_worker_command != TOI_IO_WORKER_STOP);
  591. if (toi_worker_command == TOI_IO_WORKER_RUN)
  592. goto top;
  593. }
  594. if (thread_num)
  595. atomic_dec(&toi_num_other_threads);
  596. toi_message(TOI_IO, TOI_LOW, 0, "Thread %lu exiting.", thread_num);
  597. toi__free_page(28, buffer);
  598. free_cpumask_var(orig_mask);
  599. return result;
  600. }
  601. int toi_start_other_threads(void)
  602. {
  603. int cpu;
  604. struct task_struct *p;
  605. int to_start = (toi_max_workers ? toi_max_workers : num_online_cpus()) - 1;
  606. unsigned long num_started = 0;
  607. if (test_action_state(TOI_NO_MULTITHREADED_IO))
  608. return 0;
  609. toi_worker_command = TOI_IO_WORKER_STOP;
  610. for_each_online_cpu(cpu) {
  611. if (num_started == to_start)
  612. break;
  613. if (cpu == smp_processor_id())
  614. continue;
  615. p = kthread_create_on_node(worker_rw_loop, (void *)num_started + 1,
  616. cpu_to_node(cpu), "ktoi_io/%d", cpu);
  617. if (IS_ERR(p)) {
  618. pr_err("ktoi_io for %i failed\n", cpu);
  619. continue;
  620. }
  621. kthread_bind(p, cpu);
  622. p->flags |= PF_MEMALLOC;
  623. wake_up_process(p);
  624. num_started++;
  625. atomic_inc(&toi_num_other_threads);
  626. }
  627. hib_warn("Started %ld threads.", num_started);
  628. toi_message(TOI_IO, TOI_LOW, 0, "Started %lu threads.", num_started);
  629. return num_started;
  630. }
  631. void toi_stop_other_threads(void)
  632. {
  633. toi_message(TOI_IO, TOI_LOW, 0, "Stopping other threads.");
  634. toi_worker_command = TOI_IO_WORKER_EXIT;
  635. wake_up(&toi_worker_wait_queue);
  636. }
  637. /**
  638. * do_rw_loop - main highlevel function for reading or writing pages
  639. *
  640. * Create the io_map bitmap and call worker_rw_loop to perform I/O operations.
  641. **/
  642. static int do_rw_loop(int write, int finish_at, struct memory_bitmap *pageflags,
  643. int base, int barmax, int pageset)
  644. {
  645. int index = 0, cpu, result = 0, workers_started;
  646. unsigned long pfn, next;
  647. first_filter = toi_get_next_filter(NULL);
  648. if (!finish_at)
  649. return 0;
  650. io_write = write;
  651. io_finish_at = finish_at;
  652. io_base = base;
  653. io_barmax = barmax;
  654. io_pageset = pageset;
  655. io_index = 0;
  656. io_pc = io_finish_at / 5;
  657. io_pc_step = 1;
  658. io_result = 0;
  659. io_nextupdate = base + 1;
  660. toi_bio_queue_flusher_should_finish = 0;
  661. for_each_online_cpu(cpu) {
  662. per_cpu(last_sought, cpu) = NULL;
  663. per_cpu(last_low_page, cpu) = NULL;
  664. per_cpu(last_high_page, cpu) = NULL;
  665. }
  666. /* Ensure all bits clear */
  667. memory_bm_clear(io_map);
  668. memory_bm_position_reset(io_map);
  669. next = memory_bm_next_pfn(io_map, 0);
  670. BUG_ON(next != BM_END_OF_MAP);
  671. /* Set the bits for the pages to write */
  672. memory_bm_position_reset(pageflags);
  673. pfn = memory_bm_next_pfn(pageflags, 0);
  674. toi_trace_index++;
  675. while (pfn != BM_END_OF_MAP && index < finish_at) {
  676. TOI_TRACE_DEBUG(pfn, "_io_pageset_%d (%d/%d)", pageset, index + 1, finish_at);
  677. memory_bm_set_bit(io_map, 0, pfn);
  678. pfn = memory_bm_next_pfn(pageflags, 0);
  679. index++;
  680. }
  681. BUG_ON(next != BM_END_OF_MAP || index < finish_at);
  682. memory_bm_position_reset(io_map);
  683. toi_trace_index++;
  684. atomic_set(&io_count, finish_at);
  685. memory_bm_position_reset(pageset1_map);
  686. mutex_lock(&io_mutex);
  687. clear_toi_state(TOI_IO_STOPPED);
  688. using_flusher = (atomic_read(&toi_num_other_threads) &&
  689. toiActiveAllocator->io_flusher &&
  690. !test_action_state(TOI_NO_FLUSHER_THREAD));
  691. workers_started = atomic_read(&toi_num_other_threads);
  692. memory_bm_position_reset(io_map);
  693. memory_bm_position_reset(pageset1_copy_map);
  694. toi_worker_command = TOI_IO_WORKER_RUN;
  695. wake_up(&toi_worker_wait_queue);
  696. mutex_unlock(&io_mutex);
  697. if (using_flusher)
  698. result = toiActiveAllocator->io_flusher(write);
  699. else
  700. worker_rw_loop(NULL);
  701. while (atomic_read(&toi_io_workers))
  702. schedule();
  703. pr_warn("\n");
  704. toi_worker_command = TOI_IO_WORKER_STOP;
  705. wake_up(&toi_worker_wait_queue);
  706. if (unlikely(test_toi_state(TOI_STOP_RESUME))) {
  707. if (!atomic_read(&toi_io_workers)) {
  708. rw_cleanup_modules(READ);
  709. set_toi_state(TOI_IO_STOPPED);
  710. }
  711. while (1)
  712. schedule();
  713. }
  714. set_toi_state(TOI_IO_STOPPED);
  715. if (!io_result && !result && !test_result_state(TOI_ABORTED)) {
  716. unsigned long next;
  717. toi_update_status(io_base + io_finish_at, io_barmax,
  718. " %d/%d MB ", MB(io_base + io_finish_at), MB(io_barmax));
  719. memory_bm_position_reset(io_map);
  720. next = memory_bm_next_pfn(io_map, 0);
  721. if (next != BM_END_OF_MAP) {
  722. pr_warn("Finished I/O loop but still work to do?\nFinish at = %d. io_count = %d.\n",
  723. finish_at, atomic_read(&io_count));
  724. pr_warn("I/O bitmap still records work to do." "%ld.\n", next);
  725. #ifdef CONFIG_TOI_FIXUP
  726. HIB_SHOW_MEMINFO();
  727. #endif
  728. BUG();
  729. do {
  730. cpu_relax();
  731. } while (0);
  732. }
  733. }
  734. return io_result ? io_result : result;
  735. }
  736. /**
  737. * write_pageset - write a pageset to disk.
  738. * @pagedir: Which pagedir to write.
  739. *
  740. * Returns:
  741. * Zero on success or -1 on failure.
  742. **/
  743. int write_pageset(struct pagedir *pagedir)
  744. {
  745. int finish_at, base = 0;
  746. int barmax = pagedir1.size + pagedir2.size;
  747. long error = 0;
  748. struct memory_bitmap *pageflags;
  749. unsigned long start_time, end_time;
  750. /*
  751. * Even if there is nothing to read or write, the allocator
  752. * may need the init/cleanup for it's housekeeping. (eg:
  753. * Pageset1 may start where pageset2 ends when writing).
  754. */
  755. finish_at = pagedir->size;
  756. if (pagedir->id == 1) {
  757. hib_log("start to writing kernel & process data...\n");
  758. toi_prepare_status(DONT_CLEAR_BAR, "Writing kernel & process data...");
  759. base = pagedir2.size;
  760. if (test_action_state(TOI_TEST_FILTER_SPEED) || test_action_state(TOI_TEST_BIO))
  761. pageflags = pageset1_map;
  762. else
  763. pageflags = pageset1_copy_map;
  764. } else {
  765. hib_log("start to writing caches...\n");
  766. toi_prepare_status(DONT_CLEAR_BAR, "Writing caches...");
  767. pageflags = pageset2_map;
  768. }
  769. start_time = jiffies;
  770. if (rw_init_modules(WRITE, pagedir->id)) {
  771. abort_hibernate(TOI_FAILED_MODULE_INIT,
  772. "Failed to initialise modules for writing.");
  773. error = 1;
  774. }
  775. if (!error)
  776. error = do_rw_loop(WRITE, finish_at, pageflags, base, barmax, pagedir->id);
  777. if (rw_cleanup_modules(WRITE) && !error) {
  778. abort_hibernate(TOI_FAILED_MODULE_CLEANUP, "Failed to cleanup after writing.");
  779. error = 1;
  780. }
  781. end_time = jiffies;
  782. if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
  783. toi_bkd.toi_io_time[0][0] += finish_at,
  784. toi_bkd.toi_io_time[0][1] += (end_time - start_time);
  785. }
  786. hib_log("@line:%d return value(%ld)\n", __LINE__, error);
  787. return error;
  788. }
  789. /**
  790. * read_pageset - highlevel function to read a pageset from disk
  791. * @pagedir: pageset to read
  792. * @overwrittenpagesonly: Whether to read the whole pageset or
  793. * only part of it.
  794. *
  795. * Returns:
  796. * Zero on success or -1 on failure.
  797. **/
  798. static int read_pageset(struct pagedir *pagedir, int overwrittenpagesonly)
  799. {
  800. int result = 0, base = 0;
  801. int finish_at = pagedir->size;
  802. int barmax = pagedir1.size + pagedir2.size;
  803. struct memory_bitmap *pageflags;
  804. unsigned long start_time, end_time;
  805. if (pagedir->id == 1) {
  806. toi_prepare_status(DONT_CLEAR_BAR, "Reading kernel & process data...");
  807. pageflags = pageset1_map;
  808. } else {
  809. toi_prepare_status(DONT_CLEAR_BAR, "Reading caches...");
  810. if (overwrittenpagesonly) {
  811. barmax = min(pagedir1.size, pagedir2.size);
  812. finish_at = min(pagedir1.size, pagedir2.size);
  813. } else
  814. base = pagedir1.size;
  815. pageflags = pageset2_map;
  816. }
  817. start_time = jiffies;
  818. if (rw_init_modules(READ, pagedir->id)) {
  819. toiActiveAllocator->remove_image();
  820. result = 1;
  821. } else
  822. result = do_rw_loop(READ, finish_at, pageflags, base, barmax, pagedir->id);
  823. if (rw_cleanup_modules(READ) && !result) {
  824. abort_hibernate(TOI_FAILED_MODULE_CLEANUP, "Failed to cleanup after reading.");
  825. result = 1;
  826. }
  827. /* Statistics */
  828. end_time = jiffies;
  829. if ((end_time - start_time) && (!test_result_state(TOI_ABORTED))) {
  830. toi_bkd.toi_io_time[1][0] += finish_at,
  831. toi_bkd.toi_io_time[1][1] += (end_time - start_time);
  832. }
  833. return result;
  834. }
  835. /**
  836. * write_module_configs - store the modules configuration
  837. *
  838. * The configuration for each module is stored in the image header.
  839. * Returns: Int
  840. * Zero on success, Error value otherwise.
  841. **/
  842. static int write_module_configs(void)
  843. {
  844. struct toi_module_ops *this_module;
  845. char *buffer = (char *)toi_get_zeroed_page(22, TOI_ATOMIC_GFP);
  846. int len, index = 1;
  847. struct toi_module_header toi_module_header;
  848. if (!buffer) {
  849. pr_warn("Failed to allocate a buffer for saving module configuration info");
  850. return -ENOMEM;
  851. }
  852. /*
  853. * We have to know which data goes with which module, so we at
  854. * least write a length of zero for a module. Note that we are
  855. * also assuming every module's config data takes <= PAGE_SIZE.
  856. */
  857. /* For each module (in registration order) */
  858. list_for_each_entry(this_module, &toi_modules, module_list) {
  859. if (!this_module->enabled || !this_module->storage_needed ||
  860. (this_module->type == WRITER_MODULE && toiActiveAllocator != this_module))
  861. continue;
  862. /* Get the data from the module */
  863. len = 0;
  864. if (this_module->save_config_info)
  865. len = this_module->save_config_info(buffer);
  866. /* Save the details of the module */
  867. toi_module_header.enabled = this_module->enabled;
  868. toi_module_header.type = this_module->type;
  869. toi_module_header.index = index++;
  870. strncpy(toi_module_header.name, this_module->name, sizeof(toi_module_header.name));
  871. toiActiveAllocator->rw_header_chunk(WRITE,
  872. this_module,
  873. (char *)&toi_module_header,
  874. sizeof(toi_module_header));
  875. /* Save the size of the data and any data returned */
  876. toiActiveAllocator->rw_header_chunk(WRITE, this_module, (char *)&len, sizeof(int));
  877. if (len)
  878. toiActiveAllocator->rw_header_chunk(WRITE, this_module, buffer, len);
  879. }
  880. /* Write a blank header to terminate the list */
  881. toi_module_header.name[0] = '\0';
  882. toiActiveAllocator->rw_header_chunk(WRITE, NULL,
  883. (char *)&toi_module_header, sizeof(toi_module_header));
  884. toi_free_page(22, (unsigned long)buffer);
  885. return 0;
  886. }
  887. /**
  888. * read_one_module_config - read and configure one module
  889. *
  890. * Read the configuration for one module, and configure the module
  891. * to match if it is loaded.
  892. *
  893. * Returns: Int
  894. * Zero on success, Error value otherwise.
  895. **/
  896. static int read_one_module_config(struct toi_module_header *header)
  897. {
  898. struct toi_module_ops *this_module;
  899. int result, len;
  900. char *buffer;
  901. /* Find the module */
  902. this_module = toi_find_module_given_name(header->name);
  903. if (!this_module) {
  904. if (header->enabled) {
  905. toi_early_boot_message(1, TOI_CONTINUE_REQ,
  906. "need module %s for reading the image but it hasn't been registered",
  907. header->name);
  908. if (!(test_toi_state(TOI_CONTINUE_REQ)))
  909. return -EINVAL;
  910. } else
  911. pr_warn("Module %s config data found, not registered and was disabled. ignoring its data.",
  912. header->name);
  913. }
  914. /* Get the length of the data (if any) */
  915. result = toiActiveAllocator->rw_header_chunk(READ, NULL, (char *)&len, sizeof(int));
  916. if (result) {
  917. pr_err("Failed to read the length of the module %s's configuration data", header->name);
  918. return -EINVAL;
  919. }
  920. /* Read any data and pass to the module (if we found one) */
  921. if (!len)
  922. return 0;
  923. buffer = (char *)toi_get_zeroed_page(23, TOI_ATOMIC_GFP);
  924. if (!buffer) {
  925. pr_err("Failed to allocate a buffer for reloading module configuration info");
  926. return -ENOMEM;
  927. }
  928. toiActiveAllocator->rw_header_chunk(READ, NULL, buffer, len);
  929. if (!this_module)
  930. goto out;
  931. if (!this_module->save_config_info)
  932. pr_err("Huh? Module %s appears to have a save_config_info, but not a load_config_info function",
  933. this_module->name);
  934. else
  935. this_module->load_config_info(buffer, len);
  936. /*
  937. * Now move this module to the tail of its lists. This will put it in
  938. * order. Any new modules will end up at the top of the lists. They
  939. * should have been set to disabled when loaded (people will
  940. * normally not edit an initrd to load a new module and then hibernate
  941. * without using it!).
  942. */
  943. toi_move_module_tail(this_module);
  944. this_module->enabled = header->enabled;
  945. out:
  946. toi_free_page(23, (unsigned long)buffer);
  947. return 0;
  948. }
  949. /**
  950. * read_module_configs - reload module configurations from the image header.
  951. *
  952. * Returns: Int
  953. * Zero on success or an error code.
  954. **/
  955. static int read_module_configs(void)
  956. {
  957. int result = 0;
  958. struct toi_module_header toi_module_header;
  959. struct toi_module_ops *this_module;
  960. /* All modules are initially disabled. That way, if we have a module
  961. * loaded now that wasn't loaded when we hibernated, it won't be used
  962. * in trying to read the data.
  963. */
  964. list_for_each_entry(this_module, &toi_modules, module_list)
  965. this_module->enabled = 0;
  966. /* Get the first module header */
  967. result = toiActiveAllocator->rw_header_chunk(READ, NULL,
  968. (char *)&toi_module_header,
  969. sizeof(toi_module_header));
  970. if (result) {
  971. pr_err("Failed to read the next module header.\n");
  972. return -EINVAL;
  973. }
  974. /* For each module (in registration order) */
  975. while (toi_module_header.name[0]) {
  976. result = read_one_module_config(&toi_module_header);
  977. if (result)
  978. return -EINVAL;
  979. /* Get the next module header */
  980. result = toiActiveAllocator->rw_header_chunk(READ, NULL,
  981. (char *)&toi_module_header,
  982. sizeof(toi_module_header));
  983. if (result) {
  984. pr_err("Failed to read the next module " "header.\n");
  985. return -EINVAL;
  986. }
  987. }
  988. return 0;
  989. }
  990. static inline int save_fs_info(struct fs_info *fs, struct block_device *bdev)
  991. {
  992. #ifdef CONFIG_TOI_ENHANCE
  993. char buf[BDEVNAME_SIZE];
  994. bdevname(bdev, buf);
  995. if (!toi_ignore_late_initcall() && strstr(buf, "dm-"))
  996. return 0;
  997. #endif
  998. return (!fs || IS_ERR(fs) || !fs->last_mount_size) ? 0 : 1;
  999. }
  1000. int fs_info_space_needed(void)
  1001. {
  1002. const struct super_block *sb;
  1003. int result = sizeof(int);
  1004. list_for_each_entry(sb, &super_blocks, s_list) {
  1005. struct fs_info *fs;
  1006. if (!sb->s_bdev)
  1007. continue;
  1008. fs = fs_info_from_block_dev(sb->s_bdev);
  1009. if (save_fs_info(fs, sb->s_bdev))
  1010. result += 16 + sizeof(dev_t) + sizeof(int) + fs->last_mount_size;
  1011. free_fs_info(fs);
  1012. }
  1013. return result;
  1014. }
  1015. static int fs_info_num_to_save(void)
  1016. {
  1017. const struct super_block *sb;
  1018. int to_save = 0;
  1019. list_for_each_entry(sb, &super_blocks, s_list) {
  1020. struct fs_info *fs;
  1021. if (!sb->s_bdev)
  1022. continue;
  1023. fs = fs_info_from_block_dev(sb->s_bdev);
  1024. if (save_fs_info(fs, sb->s_bdev))
  1025. to_save++;
  1026. free_fs_info(fs);
  1027. }
  1028. return to_save;
  1029. }
  1030. static int fs_info_save(void)
  1031. {
  1032. const struct super_block *sb;
  1033. int to_save = fs_info_num_to_save();
  1034. if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, (char *)&to_save, sizeof(int))) {
  1035. abort_hibernate(TOI_FAILED_IO, "Failed to write num fs_info" " to save.");
  1036. return -EIO;
  1037. }
  1038. list_for_each_entry(sb, &super_blocks, s_list) {
  1039. struct fs_info *fs;
  1040. if (!sb->s_bdev)
  1041. continue;
  1042. fs = fs_info_from_block_dev(sb->s_bdev);
  1043. if (save_fs_info(fs, sb->s_bdev)) {
  1044. if (toiActiveAllocator->rw_header_chunk(WRITE, NULL, &fs->uuid[0], 16)) {
  1045. abort_hibernate(TOI_FAILED_IO, "Failed to " "write uuid.");
  1046. return -EIO;
  1047. }
  1048. if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
  1049. (char *)&fs->dev_t,
  1050. sizeof(dev_t))) {
  1051. abort_hibernate(TOI_FAILED_IO, "Failed to " "write dev_t.");
  1052. return -EIO;
  1053. }
  1054. if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
  1055. (char *)&fs->last_mount_size,
  1056. sizeof(int))) {
  1057. abort_hibernate(TOI_FAILED_IO,
  1058. "Failed to " "write last mount length.");
  1059. return -EIO;
  1060. }
  1061. if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
  1062. fs->last_mount,
  1063. fs->last_mount_size)) {
  1064. abort_hibernate(TOI_FAILED_IO, "Failed to " "write uuid.");
  1065. return -EIO;
  1066. }
  1067. }
  1068. free_fs_info(fs);
  1069. }
  1070. return 0;
  1071. }
  1072. static int fs_info_load_and_check_one(void)
  1073. {
  1074. char uuid[16], *last_mount;
  1075. int result = 0, ln;
  1076. dev_t dev_t;
  1077. struct block_device *dev;
  1078. struct fs_info *fs_info, seek;
  1079. if (toiActiveAllocator->rw_header_chunk(READ, NULL, uuid, 16)) {
  1080. abort_hibernate(TOI_FAILED_IO, "Failed to read uuid.");
  1081. return -EIO;
  1082. }
  1083. read_if_version(3, dev_t, "uuid dev_t field", return -EIO);
  1084. if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *)&ln, sizeof(int))) {
  1085. abort_hibernate(TOI_FAILED_IO, "Failed to read last mount size.");
  1086. return -EIO;
  1087. }
  1088. last_mount = kzalloc(ln, GFP_KERNEL);
  1089. if (!last_mount)
  1090. return -ENOMEM;
  1091. if (toiActiveAllocator->rw_header_chunk(READ, NULL, last_mount, ln)) {
  1092. abort_hibernate(TOI_FAILED_IO, "Failed to read last mount timestamp.");
  1093. result = -EIO;
  1094. goto out_lmt;
  1095. }
  1096. strncpy((char *)&seek.uuid, uuid, 16);
  1097. seek.dev_t = dev_t;
  1098. seek.last_mount_size = ln;
  1099. seek.last_mount = last_mount;
  1100. dev_t = blk_lookup_fs_info(&seek);
  1101. if (!dev_t)
  1102. goto out_lmt;
  1103. dev = toi_open_by_devnum(dev_t);
  1104. fs_info = fs_info_from_block_dev(dev);
  1105. if (fs_info && !IS_ERR(fs_info)) {
  1106. if (ln != fs_info->last_mount_size) {
  1107. pr_err("Found matching uuid but last mount time lengths differ?! (%d vs %d).\n",
  1108. ln, fs_info->last_mount_size);
  1109. result = -EINVAL;
  1110. } else {
  1111. char buf[BDEVNAME_SIZE];
  1112. result = !!memcmp(fs_info->last_mount, last_mount, ln);
  1113. if (result)
  1114. pr_err("Last mount time for %s has changed!\n", bdevname(dev, buf));
  1115. }
  1116. }
  1117. toi_close_bdev(dev);
  1118. free_fs_info(fs_info);
  1119. out_lmt:
  1120. kfree(last_mount);
  1121. return result;
  1122. }
  1123. static int fs_info_load_and_check(void)
  1124. {
  1125. int to_do, result = 0;
  1126. if (toiActiveAllocator->rw_header_chunk(READ, NULL, (char *)&to_do, sizeof(int))) {
  1127. abort_hibernate(TOI_FAILED_IO, "Failed to read num fs_info " "to load.");
  1128. return -EIO;
  1129. }
  1130. while (to_do--)
  1131. result |= fs_info_load_and_check_one();
  1132. return result;
  1133. }
  1134. /**
  1135. * write_image_header - write the image header after write the image proper
  1136. *
  1137. * Returns: Int
  1138. * Zero on success, error value otherwise.
  1139. **/
  1140. int write_image_header(void)
  1141. {
  1142. int ret;
  1143. int total = pagedir1.size + pagedir2.size + 2;
  1144. char *header_buffer = NULL;
  1145. /* Now prepare to write the header */
  1146. ret = toiActiveAllocator->write_header_init();
  1147. if (ret) {
  1148. abort_hibernate(TOI_FAILED_MODULE_INIT,
  1149. "Active allocator's write_header_init" " function failed.");
  1150. goto write_image_header_abort;
  1151. }
  1152. /* Get a buffer */
  1153. header_buffer = (char *)toi_get_zeroed_page(24, TOI_ATOMIC_GFP);
  1154. if (!header_buffer) {
  1155. abort_hibernate(TOI_OUT_OF_MEMORY,
  1156. "Out of memory when trying to get page for header!");
  1157. goto write_image_header_abort;
  1158. }
  1159. /* Write hibernate header */
  1160. if (fill_toi_header((struct toi_header *)header_buffer)) {
  1161. abort_hibernate(TOI_OUT_OF_MEMORY, "Failure to fill header information!");
  1162. goto write_image_header_abort;
  1163. }
  1164. if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
  1165. header_buffer, sizeof(struct toi_header))) {
  1166. abort_hibernate(TOI_OUT_OF_MEMORY, "Failure to write header info.");
  1167. goto write_image_header_abort;
  1168. }
  1169. if (toiActiveAllocator->rw_header_chunk(WRITE, NULL,
  1170. (char *)&toi_max_workers,
  1171. sizeof(toi_max_workers))) {
  1172. abort_hibernate(TOI_OUT_OF_MEMORY, "Failure to number of workers to use.");
  1173. goto write_image_header_abort;
  1174. }
  1175. /* Write filesystem info */
  1176. if (fs_info_save())
  1177. goto write_image_header_abort;
  1178. /* Write module configurations */
  1179. ret = write_module_configs();
  1180. if (ret) {
  1181. abort_hibernate(TOI_FAILED_IO, "Failed to write module configs.");
  1182. goto write_image_header_abort;
  1183. }
  1184. if (memory_bm_write(pageset1_map, toiActiveAllocator->rw_header_chunk)) {
  1185. abort_hibernate(TOI_FAILED_IO, "Failed to write bitmaps.");
  1186. goto write_image_header_abort;
  1187. }
  1188. /* Flush data and let allocator cleanup */
  1189. if (toiActiveAllocator->write_header_cleanup()) {
  1190. abort_hibernate(TOI_FAILED_IO, "Failed to cleanup writing header.");
  1191. goto write_image_header_abort_no_cleanup;
  1192. }
  1193. if (test_result_state(TOI_ABORTED))
  1194. goto write_image_header_abort_no_cleanup;
  1195. toi_update_status(total, total, NULL);
  1196. out:
  1197. if (header_buffer)
  1198. toi_free_page(24, (unsigned long)header_buffer);
  1199. return ret;
  1200. write_image_header_abort:
  1201. toiActiveAllocator->write_header_cleanup();
  1202. write_image_header_abort_no_cleanup:
  1203. ret = -1;
  1204. goto out;
  1205. }
  1206. /**
  1207. * sanity_check - check the header
  1208. * @sh: the header which was saved at hibernate time.
  1209. *
  1210. * Perform a few checks, seeking to ensure that the kernel being
  1211. * booted matches the one hibernated. They need to match so we can
  1212. * be _sure_ things will work. It is not absolutely impossible for
  1213. * resuming from a different kernel to work, just not assured.
  1214. **/
  1215. static char *sanity_check(struct toi_header *sh)
  1216. {
  1217. char *reason = check_image_kernel((struct swsusp_info *)sh);
  1218. if (reason)
  1219. return reason;
  1220. if (!test_action_state(TOI_IGNORE_ROOTFS)) {
  1221. const struct super_block *sb;
  1222. list_for_each_entry(sb, &super_blocks, s_list) {
  1223. if ((!(sb->s_flags & MS_RDONLY)) &&
  1224. (sb->s_type->fs_flags & FS_REQUIRES_DEV))
  1225. return "Device backed fs is mounted rw prior to resume or initrd/ramfs is mounted rw";
  1226. }
  1227. }
  1228. return NULL;
  1229. }
  1230. static DECLARE_WAIT_QUEUE_HEAD(freeze_wait);
  1231. #define FREEZE_IN_PROGRESS (~0)
  1232. static int freeze_result;
  1233. static void do_freeze(struct work_struct *dummy)
  1234. {
  1235. freeze_result = freeze_processes();
  1236. wake_up(&freeze_wait);
  1237. trap_non_toi_io = 1;
  1238. }
  1239. static DECLARE_WORK(freeze_work, do_freeze);
  1240. /**
  1241. * __read_pageset1 - test for the existence of an image and attempt to load it
  1242. *
  1243. * Returns: Int
  1244. * Zero if image found and pageset1 successfully loaded.
  1245. * Error if no image found or loaded.
  1246. **/
  1247. static int __read_pageset1(void)
  1248. {
  1249. int i, result = 0;
  1250. char *header_buffer = (char *)toi_get_zeroed_page(25, TOI_ATOMIC_GFP), *sanity_error = NULL;
  1251. struct toi_header *toi_header;
  1252. if (!header_buffer) {
  1253. pr_warn("Unable to allocate a page for reading the " "signature.\n");
  1254. return -ENOMEM;
  1255. }
  1256. /* Check for an image */
  1257. result = toiActiveAllocator->image_exists(1);
  1258. if (result == 3) {
  1259. result = -ENODATA;
  1260. toi_early_boot_message(1, 0, "The signature from an older version of TuxOnIce has been detected.");
  1261. goto out_remove_image;
  1262. }
  1263. if (result != 1) {
  1264. result = -ENODATA;
  1265. noresume_reset_modules();
  1266. pr_warn("TuxOnIce: No image found.\n");
  1267. goto out;
  1268. }
  1269. /*
  1270. * Prepare the active allocator for reading the image header. The
  1271. * activate allocator might read its own configuration.
  1272. *
  1273. * NB: This call may never return because there might be a signature
  1274. * for a different image such that we warn the user and they choose
  1275. * to reboot. (If the device ids look erroneous (2.4 vs 2.6) or the
  1276. * location of the image might be unavailable if it was stored on a
  1277. * network connection).
  1278. */
  1279. result = toiActiveAllocator->read_header_init();
  1280. if (result) {
  1281. pr_warn("TuxOnIce: Failed to initialise, reading the " "image header.\n");
  1282. goto out_remove_image;
  1283. }
  1284. /* Check for noresume command line option */
  1285. if (test_toi_state(TOI_NORESUME_SPECIFIED)) {
  1286. pr_warn("TuxOnIce: Noresume on command line. Removed " "image.\n");
  1287. goto out_remove_image;
  1288. }
  1289. /* Check whether we've resumed before */
  1290. if (test_toi_state(TOI_RESUMED_BEFORE)) {
  1291. toi_early_boot_message(1, 0, NULL);
  1292. if (!(test_toi_state(TOI_CONTINUE_REQ))) {
  1293. pr_warn("TuxOnIce: Tried to resume before: Invalidated image");
  1294. goto out_remove_image;
  1295. }
  1296. }
  1297. clear_toi_state(TOI_CONTINUE_REQ);
  1298. toi_image_header_version = toiActiveAllocator->get_header_version();
  1299. if (unlikely(toi_image_header_version > TOI_HEADER_VERSION)) {
  1300. toi_early_boot_message(1, 0, image_version_error);
  1301. if (!(test_toi_state(TOI_CONTINUE_REQ))) {
  1302. pr_warn("TuxOnIce: Header version too new: Invalidated image");
  1303. goto out_remove_image;
  1304. }
  1305. }
  1306. /* Read hibernate header */
  1307. result = toiActiveAllocator->rw_header_chunk(READ, NULL,
  1308. header_buffer, sizeof(struct toi_header));
  1309. if (result < 0) {
  1310. pr_err("TuxOnIce: Failed to read the image " "signature.\n");
  1311. goto out_remove_image;
  1312. }
  1313. toi_header = (struct toi_header *)header_buffer;
  1314. /*
  1315. * NB: This call may also result in a reboot rather than returning.
  1316. */
  1317. sanity_error = sanity_check(toi_header);
  1318. if (sanity_error) {
  1319. toi_early_boot_message(1, TOI_CONTINUE_REQ, sanity_error);
  1320. pr_warn("TuxOnIce: Sanity check failed.\n");
  1321. goto out_remove_image;
  1322. }
  1323. /*
  1324. * We have an image and it looks like it will load okay.
  1325. *
  1326. * Get metadata from header. Don't override commandline parameters.
  1327. *
  1328. * We don't need to save the image size limit because it's not used
  1329. * during resume and will be restored with the image anyway.
  1330. */
  1331. memcpy((char *)&pagedir1, (char *)&toi_header->pagedir, sizeof(pagedir1));
  1332. toi_result = toi_header->param0;
  1333. if (!toi_bkd.toi_debug_state) {
  1334. toi_bkd.toi_action =
  1335. (toi_header->param1 & ~toi_bootflags_mask) |
  1336. (toi_bkd.toi_action & toi_bootflags_mask);
  1337. toi_bkd.toi_debug_state = toi_header->param2;
  1338. toi_bkd.toi_default_console_level = toi_header->param3;
  1339. }
  1340. clear_toi_state(TOI_IGNORE_LOGLEVEL);
  1341. pagedir2.size = toi_header->pageset_2_size;
  1342. for (i = 0; i < 4; i++)
  1343. toi_bkd.toi_io_time[i / 2][i % 2] = toi_header->io_time[i / 2][i % 2];
  1344. set_toi_state(TOI_BOOT_KERNEL);
  1345. boot_kernel_data_buffer = toi_header->bkd;
  1346. read_if_version(1, toi_max_workers, "TuxOnIce max workers", goto out_remove_image);
  1347. /* Read filesystem info */
  1348. if (fs_info_load_and_check()) {
  1349. pr_err("TuxOnIce: File system mount time checks failed. Refusing to corrupt your filesystems");
  1350. goto out_remove_image;
  1351. }
  1352. /* Read module configurations */
  1353. result = read_module_configs();
  1354. if (result) {
  1355. pagedir1.size = 0;
  1356. pagedir2.size = 0;
  1357. pr_warn("TuxOnIce: Failed to read TuxOnIce module " "configurations.\n");
  1358. clear_action_state(TOI_KEEP_IMAGE);
  1359. goto out_remove_image;
  1360. }
  1361. toi_prepare_console();
  1362. set_toi_state(TOI_NOW_RESUMING);
  1363. result = pm_notifier_call_chain(PM_RESTORE_PREPARE);
  1364. if (result)
  1365. goto out_notifier_call_chain;
  1366. if (usermodehelper_disable())
  1367. goto out_enable_usermodehelper;
  1368. current->flags |= PF_NOFREEZE;
  1369. freeze_result = FREEZE_IN_PROGRESS;
  1370. schedule_work_on(cpumask_first(cpu_online_mask), &freeze_work);
  1371. toi_cond_pause(1, "About to read original pageset1 locations.");
  1372. /*
  1373. * See _toi_rw_header_chunk in tuxonice_bio.c:
  1374. * Initialize pageset1_map by reading the map from the image.
  1375. */
  1376. if (memory_bm_read(pageset1_map, toiActiveAllocator->rw_header_chunk))
  1377. goto out_thaw;
  1378. /*
  1379. * See toi_rw_cleanup in tuxonice_bio.c:
  1380. * Clean up after reading the header.
  1381. */
  1382. result = toiActiveAllocator->read_header_cleanup();
  1383. if (result) {
  1384. pr_err("TuxOnIce: Failed to cleanup after reading the " "image header.\n");
  1385. goto out_thaw;
  1386. }
  1387. toi_cond_pause(1, "About to read pagedir.");
  1388. /*
  1389. * Get the addresses of pages into which we will load the kernel to
  1390. * be copied back and check if they conflict with the ones we are using.
  1391. */
  1392. if (toi_get_pageset1_load_addresses()) {
  1393. pr_warn("TuxOnIce: Failed to get load addresses for " "pageset1.\n");
  1394. goto out_thaw;
  1395. }
  1396. /* Read the original kernel back */
  1397. toi_cond_pause(1, "About to read pageset 1.");
  1398. /* Given the pagemap, read back the data from disk */
  1399. if (read_pageset(&pagedir1, 0)) {
  1400. toi_prepare_status(DONT_CLEAR_BAR, "Failed to read pageset 1.");
  1401. result = -EIO;
  1402. goto out_thaw;
  1403. }
  1404. toi_cond_pause(1, "About to restore original kernel.");
  1405. result = 0;
  1406. if (!test_action_state(TOI_KEEP_IMAGE) && toiActiveAllocator->mark_resume_attempted)
  1407. toiActiveAllocator->mark_resume_attempted(1);
  1408. wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
  1409. out:
  1410. current->flags &= ~PF_NOFREEZE;
  1411. toi_free_page(25, (unsigned long)header_buffer);
  1412. return result;
  1413. out_thaw:
  1414. wait_event(freeze_wait, freeze_result != FREEZE_IN_PROGRESS);
  1415. trap_non_toi_io = 0;
  1416. thaw_processes();
  1417. out_enable_usermodehelper:
  1418. usermodehelper_enable();
  1419. out_notifier_call_chain:
  1420. pm_notifier_call_chain(PM_POST_RESTORE);
  1421. toi_cleanup_console();
  1422. out_remove_image:
  1423. result = -EINVAL;
  1424. if (!test_action_state(TOI_KEEP_IMAGE))
  1425. toiActiveAllocator->remove_image();
  1426. toiActiveAllocator->read_header_cleanup();
  1427. noresume_reset_modules();
  1428. goto out;
  1429. }
  1430. /**
  1431. * read_pageset1 - highlevel function to read the saved pages
  1432. *
  1433. * Attempt to read the header and pageset1 of a hibernate image.
  1434. * Handle the outcome, complaining where appropriate.
  1435. **/
  1436. int read_pageset1(void)
  1437. {
  1438. int error;
  1439. error = __read_pageset1();
  1440. if (error && error != -ENODATA && error != -EINVAL && !test_result_state(TOI_ABORTED))
  1441. abort_hibernate(TOI_IMAGE_ERROR, "TuxOnIce: Error %d resuming\n", error);
  1442. return error;
  1443. }
  1444. /**
  1445. * get_have_image_data - check the image header
  1446. **/
  1447. static char *get_have_image_data(void)
  1448. {
  1449. char *output_buffer = (char *)toi_get_zeroed_page(26, TOI_ATOMIC_GFP);
  1450. struct toi_header *toi_header;
  1451. if (!output_buffer) {
  1452. pr_warn("Output buffer null.\n");
  1453. return NULL;
  1454. }
  1455. /* Check for an image */
  1456. if (!toiActiveAllocator->image_exists(1) ||
  1457. toiActiveAllocator->read_header_init() ||
  1458. toiActiveAllocator->rw_header_chunk(READ, NULL,
  1459. output_buffer, sizeof(struct toi_header))) {
  1460. sprintf(output_buffer, "0\n");
  1461. /*
  1462. * From an initrd/ramfs, catting have_image and
  1463. * getting a result of 0 is sufficient.
  1464. */
  1465. clear_toi_state(TOI_BOOT_TIME);
  1466. goto out;
  1467. }
  1468. toi_header = (struct toi_header *)output_buffer;
  1469. sprintf(output_buffer, "1\n%s\n%s\n", toi_header->uts.machine, toi_header->uts.version);
  1470. /* Check whether we've resumed before */
  1471. if (test_toi_state(TOI_RESUMED_BEFORE))
  1472. strcat(output_buffer, "Resumed before.\n");
  1473. out:
  1474. noresume_reset_modules();
  1475. return output_buffer;
  1476. }
  1477. /**
  1478. * read_pageset2 - read second part of the image
  1479. * @overwrittenpagesonly: Read only pages which would have been
  1480. * verwritten by pageset1?
  1481. *
  1482. * Read in part or all of pageset2 of an image, depending upon
  1483. * whether we are hibernating and have only overwritten a portion
  1484. * with pageset1 pages, or are resuming and need to read them
  1485. * all.
  1486. *
  1487. * Returns: Int
  1488. * Zero if no error, otherwise the error value.
  1489. **/
  1490. int read_pageset2(int overwrittenpagesonly)
  1491. {
  1492. int result = 0;
  1493. if (!pagedir2.size)
  1494. return 0;
  1495. result = read_pageset(&pagedir2, overwrittenpagesonly);
  1496. toi_cond_pause(1, "Pagedir 2 read.");
  1497. return result;
  1498. }
  1499. /**
  1500. * image_exists_read - has an image been found?
  1501. * @page: Output buffer
  1502. *
  1503. * Store 0 or 1 in page, depending on whether an image is found.
  1504. * Incoming buffer is PAGE_SIZE and result is guaranteed
  1505. * to be far less than that, so we don't worry about
  1506. * overflow.
  1507. **/
  1508. int image_exists_read(const char *page, int count)
  1509. {
  1510. int len = 0;
  1511. char *result;
  1512. if (toi_activate_storage(0))
  1513. return count;
  1514. if (!test_toi_state(TOI_RESUME_DEVICE_OK))
  1515. toi_attempt_to_parse_resume_device(0);
  1516. if (!toiActiveAllocator) {
  1517. len = sprintf((char *)page, "-1\n");
  1518. } else {
  1519. result = get_have_image_data();
  1520. if (result) {
  1521. len = sprintf((char *)page, "%s", result);
  1522. toi_free_page(26, (unsigned long)result);
  1523. }
  1524. }
  1525. toi_deactivate_storage(0);
  1526. return len;
  1527. }
  1528. /**
  1529. * image_exists_write - invalidate an image if one exists
  1530. **/
  1531. int image_exists_write(const char *buffer, int count)
  1532. {
  1533. if (toi_activate_storage(0))
  1534. return count;
  1535. if (toiActiveAllocator && toiActiveAllocator->image_exists(1))
  1536. toiActiveAllocator->remove_image();
  1537. toi_deactivate_storage(0);
  1538. clear_result_state(TOI_KEPT_IMAGE);
  1539. return count;
  1540. }