2 * COPYRIGHT: See COPYRIGHT.TXT
3 * PROJECT: Ext2 File System Driver for WinNT/2K/XP
5 * PROGRAMMER: Matt Wu <mattwu@163.com>
6 * HOMEPAGE: http://www.ext2fsd.com
10 /* INCLUDES *****************************************************************/
13 #include <linux/jbd.h>
14 #include <linux/errno.h>
16 /* GLOBALS ***************************************************************/
18 extern PEXT2_GLOBAL Ext2Global
;
20 /* DEFINITIONS *************************************************************/
23 #pragma alloc_text(PAGE, kzalloc)
26 struct task_struct current_task
= {
29 /* comm */ "current\0",
30 /* journal_info */ NULL
32 struct task_struct
*current
= ¤t_task
;
34 void *kzalloc(int size
, int flags
)
36 void *buffer
= kmalloc(size
, flags
);
38 memset(buffer
, 0, size
);
56 kmem_cache_t
*kc
= NULL
;
58 kc
= kmalloc(sizeof(kmem_cache_t
), GFP_KERNEL
);
63 memset(kc
, 0, sizeof(kmem_cache_t
));
64 ExInitializeNPagedLookasideList(
74 strncpy(kc
->name
, name
, 31);
75 kc
->constructor
= ctor
;
82 int kmem_cache_destroy(kmem_cache_t
* kc
)
86 ExDeleteNPagedLookasideList(&(kc
->la
));
92 void* kmem_cache_alloc(kmem_cache_t
*kc
, int flags
)
95 ptr
= ExAllocateFromNPagedLookasideList(&(kc
->la
));
97 atomic_inc(&kc
->count
);
98 atomic_inc(&kc
->acount
);
103 void kmem_cache_free(kmem_cache_t
*kc
, void *p
)
106 atomic_dec(&kc
->count
);
107 ExFreeToNPagedLookasideList(&(kc
->la
), p
);
112 // wait queue routines
115 void init_waitqueue_head(wait_queue_head_t
*q
)
117 spin_lock_init(&q
->lock
);
118 INIT_LIST_HEAD(&q
->task_list
);
121 struct __wait_queue
*
124 struct __wait_queue
* wait
= NULL
;
125 wait
= kmalloc(sizeof(struct __wait_queue
), GFP_KERNEL
);
130 memset(wait
, 0, sizeof(struct __wait_queue
));
131 wait
->flags
= WQ_FLAG_AUTO_REMOVAL
;
132 wait
->private = (void *)KeGetCurrentThread();
133 INIT_LIST_HEAD(&wait
->task_list
);
134 KeInitializeEvent(&(wait
->event
),
135 SynchronizationEvent
,
142 wait_queue_destroy(struct __wait_queue
* wait
)
147 static inline void __add_wait_queue(wait_queue_head_t
*head
, struct __wait_queue
*new)
149 list_add(&new->task_list
, &head
->task_list
);
153 * Used for wake-one threads:
155 static inline void __add_wait_queue_tail(wait_queue_head_t
*head
,
156 struct __wait_queue
*new)
158 list_add_tail(&new->task_list
, &head
->task_list
);
161 static inline void __remove_wait_queue(wait_queue_head_t
*head
,
162 struct __wait_queue
*old
)
164 list_del(&old
->task_list
);
167 void add_wait_queue(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
170 struct __wait_queue
*wait
= *waiti
;
172 wait
->flags
&= ~WQ_FLAG_EXCLUSIVE
;
173 spin_lock_irqsave(&q
->lock
, flags
);
174 __add_wait_queue(q
, wait
);
175 spin_unlock_irqrestore(&q
->lock
, flags
);
178 void add_wait_queue_exclusive(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
181 struct __wait_queue
*wait
= *waiti
;
183 wait
->flags
|= WQ_FLAG_EXCLUSIVE
;
184 spin_lock_irqsave(&q
->lock
, flags
);
185 __add_wait_queue_tail(q
, wait
);
186 spin_unlock_irqrestore(&q
->lock
, flags
);
189 void remove_wait_queue(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
192 struct __wait_queue
*wait
= *waiti
;
194 spin_lock_irqsave(&q
->lock
, flags
);
195 __remove_wait_queue(q
, wait
);
196 spin_unlock_irqrestore(&q
->lock
, flags
);
200 * Note: we use "set_current_state()" _after_ the wait-queue add,
201 * because we need a memory barrier there on SMP, so that any
202 * wake-function that tests for the wait-queue being active
203 * will be guaranteed to see waitqueue addition _or_ subsequent
204 * tests in this thread will see the wakeup having taken place.
206 * The spin_unlock() itself is semi-permeable and only protects
207 * one way (it only protects stuff inside the critical region and
208 * stops them from bleeding out - it would still allow subsequent
209 * loads to move into the critical region).
212 prepare_to_wait(wait_queue_head_t
*q
, wait_queue_t
*waiti
, int state
)
215 struct __wait_queue
*wait
= *waiti
;
217 wait
->flags
&= ~WQ_FLAG_EXCLUSIVE
;
218 spin_lock_irqsave(&q
->lock
, flags
);
219 if (list_empty(&wait
->task_list
))
220 __add_wait_queue(q
, wait
);
222 * don't alter the task state if this is just going to
223 * queue an async wait queue callback
225 if (is_sync_wait(wait
))
226 set_current_state(state
);
227 spin_unlock_irqrestore(&q
->lock
, flags
);
231 prepare_to_wait_exclusive(wait_queue_head_t
*q
, wait_queue_t
*waiti
, int state
)
234 struct __wait_queue
*wait
= *waiti
;
236 wait
->flags
|= WQ_FLAG_EXCLUSIVE
;
237 spin_lock_irqsave(&q
->lock
, flags
);
238 if (list_empty(&wait
->task_list
))
239 __add_wait_queue_tail(q
, wait
);
241 * don't alter the task state if this is just going to
242 * queue an async wait queue callback
244 if (is_sync_wait(wait
))
245 set_current_state(state
);
246 spin_unlock_irqrestore(&q
->lock
, flags
);
248 EXPORT_SYMBOL(prepare_to_wait_exclusive
);
250 void finish_wait(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
253 struct __wait_queue
*wait
= *waiti
;
255 __set_current_state(TASK_RUNNING
);
257 * We can check for list emptiness outside the lock
259 * - we use the "careful" check that verifies both
260 * the next and prev pointers, so that there cannot
261 * be any half-pending updates in progress on other
262 * CPU's that we haven't seen yet (and that might
263 * still change the stack area.
265 * - all other users take the lock (ie we can only
266 * have _one_ other CPU that looks at or modifies
269 if (!list_empty_careful(&wait
->task_list
)) {
270 spin_lock_irqsave(&q
->lock
, flags
);
271 list_del_init(&wait
->task_list
);
272 spin_unlock_irqrestore(&q
->lock
, flags
);
276 wait_queue_destroy(wait
);
279 int wake_up(wait_queue_head_t
*queue
)
281 return 0; /* KeSetEvent(&wait->event, 0, FALSE); */
286 // kernel timer routines
290 // buffer head routines
293 struct _EXT2_BUFFER_HEAD
{
294 kmem_cache_t
* bh_cache
;
297 } g_jbh
= {NULL
, ATOMIC_INIT(0)};
302 g_jbh
.bh_count
.counter
= 0;
303 g_jbh
.bh_acount
.counter
= 0;
304 g_jbh
.bh_cache
= kmem_cache_create(
306 sizeof(struct buffer_head
),
308 SLAB_TEMPORARY
, /* flags */
310 if (g_jbh
.bh_cache
== NULL
) {
311 printk(KERN_EMERG
"JBD: failed to create handle cache\n");
320 if (g_jbh
.bh_cache
) {
321 kmem_cache_destroy(g_jbh
.bh_cache
);
322 g_jbh
.bh_cache
= NULL
;
329 struct buffer_head
* bh
= NULL
;
330 bh
= kmem_cache_alloc(g_jbh
.bh_cache
, GFP_NOFS
);
332 atomic_inc(&g_jbh
.bh_count
);
333 atomic_inc(&g_jbh
.bh_acount
);
335 memset(bh
, 0, sizeof(struct buffer_head
));
336 InitializeListHead(&bh
->b_link
);
337 KeQuerySystemTime(&bh
->b_ts_creat
);
338 DEBUG(DL_BH
, ("bh=%p allocated.\n", bh
));
339 INC_MEM_COUNT(PS_BUFF_HEAD
, bh
, sizeof(struct buffer_head
));
346 free_buffer_head(struct buffer_head
* bh
)
351 DEBUG(DL_BH
, ("bh=%p mdl=%p (Flags:%xh VA:%p) released.\n", bh
, bh
->b_mdl
,
352 bh
->b_mdl
->MdlFlags
, bh
->b_mdl
->MappedSystemVa
));
353 if (IsFlagOn(bh
->b_mdl
->MdlFlags
, MDL_MAPPED_TO_SYSTEM_VA
)) {
354 MmUnmapLockedPages(bh
->b_mdl
->MappedSystemVa
, bh
->b_mdl
);
356 Ext2DestroyMdl(bh
->b_mdl
);
359 CcUnpinDataForThread(bh
->b_bcb
, (ERESOURCE_THREAD
)bh
| 0x3);
362 DEBUG(DL_BH
, ("bh=%p freed.\n", bh
));
363 DEC_MEM_COUNT(PS_BUFF_HEAD
, bh
, sizeof(struct buffer_head
));
364 kmem_cache_free(g_jbh
.bh_cache
, bh
);
365 atomic_dec(&g_jbh
.bh_count
);
370 // Red-black tree insert routine.
373 static struct buffer_head
*__buffer_head_search(struct rb_root
*root
,
376 struct rb_node
*new = root
->rb_node
;
378 /* Figure out where to put new node */
380 struct buffer_head
*bh
=
381 container_of(new, struct buffer_head
, b_rb_node
);
382 s64 result
= blocknr
- bh
->b_blocknr
;
396 static int buffer_head_blocknr_cmp(struct rb_node
*a
, struct rb_node
*b
)
398 struct buffer_head
*a_bh
, *b_bh
;
400 a_bh
= container_of(a
, struct buffer_head
, b_rb_node
);
401 b_bh
= container_of(b
, struct buffer_head
, b_rb_node
);
402 result
= a_bh
->b_blocknr
- b_bh
->b_blocknr
;
411 static struct buffer_head
*buffer_head_search(struct block_device
*bdev
,
414 struct rb_root
*root
;
415 root
= &bdev
->bd_bh_root
;
416 return __buffer_head_search(root
, blocknr
);
419 static void buffer_head_insert(struct block_device
*bdev
, struct buffer_head
*bh
)
421 rb_insert(&bdev
->bd_bh_root
, &bh
->b_rb_node
, buffer_head_blocknr_cmp
);
424 static void buffer_head_remove(struct block_device
*bdev
, struct buffer_head
*bh
)
426 rb_erase(&bh
->b_rb_node
, &bdev
->bd_bh_root
);
431 struct block_device
* bdev
,
437 PEXT2_VCB Vcb
= bdev
->bd_priv
;
438 LARGE_INTEGER offset
;
442 struct list_head
*entry
;
444 /* allocate buffer_head and initialize it */
445 struct buffer_head
*bh
= NULL
, *tbh
= NULL
;
447 /* check the block is valid or not */
448 if (block
>= TOTAL_BLOCKS
) {
453 /* search the bdev bh list */
454 ExAcquireSharedStarveExclusive(&bdev
->bd_bh_lock
, TRUE
);
455 tbh
= buffer_head_search(bdev
, block
);
459 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
462 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
464 bh
= new_buffer_head();
469 bh
->b_blocknr
= block
;
475 offset
.QuadPart
= (s64
) bh
->b_blocknr
;
476 offset
.QuadPart
<<= BLOCK_BITS
;
479 if (!CcPreparePinWrite(Vcb
->Volume
,
483 PIN_WAIT
| PIN_EXCLUSIVE
,
490 if (!CcPinRead( Vcb
->Volume
,
499 set_buffer_uptodate(bh
);
502 bh
->b_mdl
= Ext2CreateMdl(ptr
, bh
->b_size
, IoModifyAccess
);
504 /* muse map the PTE to NonCached zone. journal recovery will
505 access the PTE under spinlock: DISPATCH_LEVEL IRQL */
506 bh
->b_data
= MmMapLockedPagesSpecifyCache(
507 bh
->b_mdl
, KernelMode
, MmNonCached
,
508 NULL
,FALSE
, HighPagePriority
);
509 /* bh->b_data = MmMapLockedPages(bh->b_mdl, KernelMode); */
511 if (!bh
->b_mdl
|| !bh
->b_data
) {
512 free_buffer_head(bh
);
519 DEBUG(DL_BH
, ("getblk: Vcb=%p bhcount=%u block=%u bh=%p mdl=%p (Flags:%xh VA:%p)\n",
520 Vcb
, atomic_read(&g_jbh
.bh_count
), block
, bh
, bh
->b_mdl
, bh
->b_mdl
->MdlFlags
, bh
->b_data
));
522 ExAcquireResourceExclusiveLite(&bdev
->bd_bh_lock
, TRUE
);
523 /* do search again here */
524 tbh
= buffer_head_search(bdev
, block
);
526 free_buffer_head(bh
);
529 RemoveEntryList(&bh
->b_link
);
530 InitializeListHead(&bh
->b_link
);
531 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
534 buffer_head_insert(bdev
, bh
);
536 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
547 int submit_bh_mdl(int rw
, struct buffer_head
*bh
)
549 struct block_device
*bdev
= bh
->b_bdev
;
550 PEXT2_VCB Vcb
= bdev
->bd_priv
;
553 LARGE_INTEGER Offset
;
555 ASSERT(Vcb
->Identifier
.Type
== EXT2VCB
);
560 if (IsVcbReadOnly(Vcb
)) {
564 SetFlag(Vcb
->Volume
->Flags
, FO_FILE_MODIFIED
);
565 Offset
.QuadPart
= ((LONGLONG
)bh
->b_blocknr
) << BLOCK_BITS
;
566 if (CcPreparePinWrite(
571 PIN_WAIT
| PIN_EXCLUSIVE
,
575 if (memcmp(Buffer
, bh
->b_data
, BLOCK_SIZE
) != 0) {
578 memmove(Buffer
, bh
->b_data
, BLOCK_SIZE
);
580 CcSetDirtyPinnedData(Bcb
, NULL
);
581 Ext2AddBlockExtent( Vcb
, NULL
,
582 (ULONG
)bh
->b_blocknr
,
583 (ULONG
)bh
->b_blocknr
,
584 (bh
->b_size
>> BLOCK_BITS
));
588 Ext2AddBlockExtent( Vcb
, NULL
,
589 (ULONG
)bh
->b_blocknr
,
590 (ULONG
)bh
->b_blocknr
,
591 (bh
->b_size
>> BLOCK_BITS
));
608 struct block_device
* bdev
,
614 PEXT2_VCB Vcb
= bdev
->bd_priv
;
615 LARGE_INTEGER offset
;
617 struct list_head
*entry
;
619 /* allocate buffer_head and initialize it */
620 struct buffer_head
*bh
= NULL
, *tbh
= NULL
;
622 /* check the block is valid or not */
623 if (block
>= TOTAL_BLOCKS
) {
628 /* search the bdev bh list */
629 ExAcquireSharedStarveExclusive(&bdev
->bd_bh_lock
, TRUE
);
630 tbh
= buffer_head_search(bdev
, block
);
634 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
637 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
639 bh
= new_buffer_head();
644 bh
->b_blocknr
= block
;
650 offset
.QuadPart
= (s64
) bh
->b_blocknr
;
651 offset
.QuadPart
<<= BLOCK_BITS
;
654 if (!CcPreparePinWrite(Vcb
->Volume
,
660 (PVOID
*)&bh
->b_data
)) {
665 if (!CcPinRead( Vcb
->Volume
,
670 (PVOID
*)&bh
->b_data
)) {
674 set_buffer_uptodate(bh
);
678 CcSetBcbOwnerPointer(bh
->b_bcb
, (PVOID
)((ERESOURCE_THREAD
)bh
| 0x3));
681 free_buffer_head(bh
);
687 DEBUG(DL_BH
, ("getblk: Vcb=%p bhcount=%u block=%u bh=%p ptr=%p.\n",
688 Vcb
, atomic_read(&g_jbh
.bh_count
), block
, bh
, bh
->b_data
));
690 ExAcquireResourceExclusiveLite(&bdev
->bd_bh_lock
, TRUE
);
691 /* do search again here */
692 tbh
= buffer_head_search(bdev
, block
);
695 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
696 free_buffer_head(bh
);
698 RemoveEntryList(&bh
->b_link
);
699 InitializeListHead(&bh
->b_link
);
702 buffer_head_insert(bdev
, bh
);
704 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
712 int submit_bh_pin(int rw
, struct buffer_head
*bh
)
714 struct block_device
*bdev
= bh
->b_bdev
;
715 PEXT2_VCB Vcb
= bdev
->bd_priv
;
717 LARGE_INTEGER Offset
;
719 ASSERT(Vcb
->Identifier
.Type
== EXT2VCB
);
720 ASSERT(bh
->b_data
&& bh
->b_bcb
);
724 if (IsVcbReadOnly(Vcb
)) {
728 SetFlag(Vcb
->Volume
->Flags
, FO_FILE_MODIFIED
);
729 Offset
.QuadPart
= ((LONGLONG
)bh
->b_blocknr
) << BLOCK_BITS
;
731 CcSetDirtyPinnedData(bh
->b_bcb
, NULL
);
732 Ext2AddBlockExtent( Vcb
, NULL
,
733 (ULONG
)bh
->b_blocknr
,
734 (ULONG
)bh
->b_blocknr
,
735 (bh
->b_size
>> BLOCK_BITS
));
751 struct block_device
* bdev
,
757 return get_block_bh_mdl(bdev
, block
, size
, zero
);
760 int submit_bh(int rw
, struct buffer_head
*bh
)
762 return submit_bh_mdl(rw
, bh
);
769 struct block_device
* bdev
,
775 return get_block_bh_pin(bdev
, block
, size
, zero
);
778 int submit_bh(int rw
, struct buffer_head
*bh
)
780 return submit_bh_pin(rw
, bh
);
786 struct block_device
* bdev
,
791 return get_block_bh(bdev
, block
, size
, 0);
794 void __brelse(struct buffer_head
*bh
)
796 struct block_device
*bdev
= bh
->b_bdev
;
797 PEXT2_VCB Vcb
= (PEXT2_VCB
)bdev
->bd_priv
;
799 ASSERT(Vcb
->Identifier
.Type
== EXT2VCB
);
801 /* write data in case it's dirty */
802 while (buffer_dirty(bh
)) {
803 ll_rw_block(WRITE
, 1, &bh
);
806 if (1 == atomic_read(&bh
->b_count
)) {
807 } else if (atomic_dec_and_test(&bh
->b_count
)) {
808 atomic_inc(&bh
->b_count
);
813 ExAcquireResourceExclusiveLite(&bdev
->bd_bh_lock
, TRUE
);
814 if (atomic_dec_and_test(&bh
->b_count
)) {
815 ASSERT(0 == atomic_read(&bh
->b_count
));
817 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
820 buffer_head_remove(bdev
, bh
);
821 KeQuerySystemTime(&bh
->b_ts_drop
);
822 InsertTailList(&Vcb
->bd
.bd_bh_free
, &bh
->b_link
);
823 KeClearEvent(&Vcb
->bd
.bd_bh_notify
);
824 ExReleaseResourceLite(&bdev
->bd_bh_lock
);
825 KeSetEvent(&Ext2Global
->bhReaper
.Wait
, 0, FALSE
);
827 DEBUG(DL_BH
, ("brelse: cnt=%u size=%u blk=%10.10xh bh=%p ptr=%p\n",
828 atomic_read(&g_jbh
.bh_count
) - 1, bh
->b_size
,
829 bh
->b_blocknr
, bh
, bh
->b_data
));
833 void __bforget(struct buffer_head
*bh
)
835 clear_buffer_dirty(bh
);
839 void __lock_buffer(struct buffer_head
*bh
)
843 void unlock_buffer(struct buffer_head
*bh
)
845 clear_buffer_locked(bh
);
848 void __wait_on_buffer(struct buffer_head
*bh
)
852 void ll_rw_block(int rw
, int nr
, struct buffer_head
* bhs
[])
856 for (i
= 0; i
< nr
; i
++) {
858 struct buffer_head
*bh
= bhs
[i
];
862 else if (test_set_buffer_locked(bh
))
865 if (rw
== WRITE
|| rw
== SWRITE
) {
866 if (test_clear_buffer_dirty(bh
)) {
868 submit_bh(WRITE
, bh
);
872 if (!buffer_uptodate(bh
)) {
882 int bh_submit_read(struct buffer_head
*bh
)
884 ll_rw_block(READ
, 1, &bh
);
888 int sync_dirty_buffer(struct buffer_head
*bh
)
892 ASSERT(atomic_read(&bh
->b_count
) <= 1);
894 if (test_clear_buffer_dirty(bh
)) {
896 ret
= submit_bh(WRITE
, bh
);
904 void mark_buffer_dirty(struct buffer_head
*bh
)
906 set_buffer_dirty(bh
);
909 int sync_blockdev(struct block_device
*bdev
)
911 PEXT2_VCB Vcb
= (PEXT2_VCB
) bdev
->bd_priv
;
912 Ext2FlushVolume(NULL
, Vcb
, FALSE
);
917 * Perform a pagecache lookup for the matching buffer. If it's there, refre
918 * it in the LRU and mark it as accessed. If it is not present then return
922 __find_get_block(struct block_device
*bdev
, sector_t block
, unsigned long size
)
924 return __getblk(bdev
, block
, size
);
928 // inode block mapping
931 ULONGLONG
bmap(struct inode
*i
, ULONGLONG b
)
934 struct super_block
*s
= i
->i_sb
;
936 PEXT2_MCB Mcb
= (PEXT2_MCB
)i
->i_priv
;
937 PEXT2_VCB Vcb
= (PEXT2_VCB
)s
->s_priv
;
938 PEXT2_EXTENT extent
= NULL
;
939 ULONGLONG offset
= (ULONGLONG
)b
;
946 offset
<<= BLOCK_BITS
;
947 status
= Ext2BuildExtents(
957 if (!NT_SUCCESS(status
)) {
961 if (extent
== NULL
) {
965 lcn
= (unsigned long)(extent
->Lba
>> BLOCK_BITS
);
970 Ext2FreeExtent(extent
);
976 void iget(struct inode
*inode
)
978 atomic_inc(&inode
->i_count
);
981 void iput(struct inode
*inode
)
983 if (atomic_dec_and_test(&inode
->i_count
)) {
989 // initialzer and destructor
1008 ext2_destroy_linux()