2 * COPYRIGHT: See COPYRIGHT.TXT
3 * PROJECT: Ext2 File System Driver for WinNT/2K/XP
5 * PROGRAMMER: Matt Wu <mattwu@163.com>
6 * HOMEPAGE: http://www.ext2fsd.com
10 /* INCLUDES *****************************************************************/
13 #include <linux/jbd.h>
14 #include <linux/errno.h>
16 /* GLOBALS ***************************************************************/
18 extern PEXT2_GLOBAL Ext2Global
;
20 /* DEFINITIONS *************************************************************/
23 #pragma alloc_text(PAGE, kzalloc)
26 struct task_struct current_task
= {
29 /* comm */ "current\0",
30 /* journal_info */ NULL
32 struct task_struct
*current
= ¤t_task
;
34 void *kzalloc(int size
, int flags
)
36 void *buffer
= kmalloc(size
, flags
);
38 memset(buffer
, 0, size
);
56 kmem_cache_t
*kc
= NULL
;
58 kc
= kmalloc(sizeof(kmem_cache_t
), GFP_KERNEL
);
63 memset(kc
, 0, sizeof(kmem_cache_t
));
64 ExInitializeNPagedLookasideList(
74 strncpy(kc
->name
, name
, 31);
75 kc
->constructor
= ctor
;
82 int kmem_cache_destroy(kmem_cache_t
* kc
)
86 ExDeleteNPagedLookasideList(&(kc
->la
));
92 void* kmem_cache_alloc(kmem_cache_t
*kc
, int flags
)
95 ptr
= ExAllocateFromNPagedLookasideList(&(kc
->la
));
97 atomic_inc(&kc
->count
);
98 atomic_inc(&kc
->acount
);
103 void kmem_cache_free(kmem_cache_t
*kc
, void *p
)
106 atomic_dec(&kc
->count
);
107 ExFreeToNPagedLookasideList(&(kc
->la
), p
);
112 // wait queue routines
115 void init_waitqueue_head(wait_queue_head_t
*q
)
117 spin_lock_init(&q
->lock
);
118 INIT_LIST_HEAD(&q
->task_list
);
121 struct __wait_queue
*
124 struct __wait_queue
* wait
= NULL
;
125 wait
= kmalloc(sizeof(struct __wait_queue
), GFP_KERNEL
);
130 memset(wait
, 0, sizeof(struct __wait_queue
));
131 wait
->flags
= WQ_FLAG_AUTO_REMOVAL
;
132 wait
->private = (void *)KeGetCurrentThread();
133 INIT_LIST_HEAD(&wait
->task_list
);
134 KeInitializeEvent(&(wait
->event
),
135 SynchronizationEvent
,
142 wait_queue_destroy(struct __wait_queue
* wait
)
147 static inline void __add_wait_queue(wait_queue_head_t
*head
, struct __wait_queue
*new)
149 list_add(&new->task_list
, &head
->task_list
);
153 * Used for wake-one threads:
155 static inline void __add_wait_queue_tail(wait_queue_head_t
*head
,
156 struct __wait_queue
*new)
158 list_add_tail(&new->task_list
, &head
->task_list
);
161 static inline void __remove_wait_queue(wait_queue_head_t
*head
,
162 struct __wait_queue
*old
)
164 list_del(&old
->task_list
);
167 void add_wait_queue(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
170 struct __wait_queue
*wait
= *waiti
;
172 wait
->flags
&= ~WQ_FLAG_EXCLUSIVE
;
173 spin_lock_irqsave(&q
->lock
, flags
);
174 __add_wait_queue(q
, wait
);
175 spin_unlock_irqrestore(&q
->lock
, flags
);
178 void add_wait_queue_exclusive(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
181 struct __wait_queue
*wait
= *waiti
;
183 wait
->flags
|= WQ_FLAG_EXCLUSIVE
;
184 spin_lock_irqsave(&q
->lock
, flags
);
185 __add_wait_queue_tail(q
, wait
);
186 spin_unlock_irqrestore(&q
->lock
, flags
);
189 void remove_wait_queue(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
192 struct __wait_queue
*wait
= *waiti
;
194 spin_lock_irqsave(&q
->lock
, flags
);
195 __remove_wait_queue(q
, wait
);
196 spin_unlock_irqrestore(&q
->lock
, flags
);
200 * Note: we use "set_current_state()" _after_ the wait-queue add,
201 * because we need a memory barrier there on SMP, so that any
202 * wake-function that tests for the wait-queue being active
203 * will be guaranteed to see waitqueue addition _or_ subsequent
204 * tests in this thread will see the wakeup having taken place.
206 * The spin_unlock() itself is semi-permeable and only protects
207 * one way (it only protects stuff inside the critical region and
208 * stops them from bleeding out - it would still allow subsequent
209 * loads to move into the critical region).
212 prepare_to_wait(wait_queue_head_t
*q
, wait_queue_t
*waiti
, int state
)
215 struct __wait_queue
*wait
= *waiti
;
217 wait
->flags
&= ~WQ_FLAG_EXCLUSIVE
;
218 spin_lock_irqsave(&q
->lock
, flags
);
219 if (list_empty(&wait
->task_list
))
220 __add_wait_queue(q
, wait
);
222 * don't alter the task state if this is just going to
223 * queue an async wait queue callback
225 if (is_sync_wait(wait
))
226 set_current_state(state
);
227 spin_unlock_irqrestore(&q
->lock
, flags
);
231 prepare_to_wait_exclusive(wait_queue_head_t
*q
, wait_queue_t
*waiti
, int state
)
234 struct __wait_queue
*wait
= *waiti
;
236 wait
->flags
|= WQ_FLAG_EXCLUSIVE
;
237 spin_lock_irqsave(&q
->lock
, flags
);
238 if (list_empty(&wait
->task_list
))
239 __add_wait_queue_tail(q
, wait
);
241 * don't alter the task state if this is just going to
242 * queue an async wait queue callback
244 if (is_sync_wait(wait
))
245 set_current_state(state
);
246 spin_unlock_irqrestore(&q
->lock
, flags
);
248 EXPORT_SYMBOL(prepare_to_wait_exclusive
);
250 void finish_wait(wait_queue_head_t
*q
, wait_queue_t
*waiti
)
253 struct __wait_queue
*wait
= *waiti
;
255 __set_current_state(TASK_RUNNING
);
257 * We can check for list emptiness outside the lock
259 * - we use the "careful" check that verifies both
260 * the next and prev pointers, so that there cannot
261 * be any half-pending updates in progress on other
262 * CPU's that we haven't seen yet (and that might
263 * still change the stack area.
265 * - all other users take the lock (ie we can only
266 * have _one_ other CPU that looks at or modifies
269 if (!list_empty_careful(&wait
->task_list
)) {
270 spin_lock_irqsave(&q
->lock
, flags
);
271 list_del_init(&wait
->task_list
);
272 spin_unlock_irqrestore(&q
->lock
, flags
);
276 wait_queue_destroy(wait
);
279 int wake_up(wait_queue_head_t
*queue
)
281 return 0; /* KeSetEvent(&wait->event, 0, FALSE); */
286 // kernel timer routines
290 // buffer head routines
293 struct _EXT2_BUFFER_HEAD
{
294 kmem_cache_t
* bh_cache
;
297 } g_jbh
= {NULL
, ATOMIC_INIT(0)};
302 g_jbh
.bh_count
.counter
= 0;
303 g_jbh
.bh_acount
.counter
= 0;
304 g_jbh
.bh_cache
= kmem_cache_create(
306 sizeof(struct buffer_head
),
308 SLAB_TEMPORARY
, /* flags */
310 if (g_jbh
.bh_cache
== NULL
) {
311 printk(KERN_EMERG
"JBD: failed to create handle cache\n");
320 if (g_jbh
.bh_cache
) {
321 kmem_cache_destroy(g_jbh
.bh_cache
);
322 g_jbh
.bh_cache
= NULL
;
329 struct buffer_head
* bh
= NULL
;
330 bh
= kmem_cache_alloc(g_jbh
.bh_cache
, GFP_NOFS
);
332 memset(bh
, 0, sizeof(struct buffer_head
));
333 DEBUG(DL_BH
, ("bh=%p allocated.\n", bh
));
334 INC_MEM_COUNT(PS_BUFF_HEAD
, bh
, sizeof(struct buffer_head
));
340 free_buffer_head(struct buffer_head
* bh
)
345 DEBUG(DL_BH
, ("bh=%p mdl=%p (Flags:%xh VA:%p) released.\n", bh
, bh
->b_mdl
,
346 bh
->b_mdl
->MdlFlags
, bh
->b_mdl
->MappedSystemVa
));
347 if (IsFlagOn(bh
->b_mdl
->MdlFlags
, MDL_PAGES_LOCKED
)) {
348 /* MmUnlockPages will release it's VA */
349 MmUnlockPages(bh
->b_mdl
);
350 } else if (IsFlagOn(bh
->b_mdl
->MdlFlags
, MDL_MAPPED_TO_SYSTEM_VA
)) {
351 MmUnmapLockedPages(bh
->b_mdl
->MappedSystemVa
, bh
->b_mdl
);
354 Ext2DestroyMdl(bh
->b_mdl
);
356 DEBUG(DL_BH
, ("bh=%p freed.\n", bh
));
357 DEC_MEM_COUNT(PS_BUFF_HEAD
, bh
, sizeof(struct buffer_head
));
358 kmem_cache_free(g_jbh
.bh_cache
, bh
);
363 // Red-black tree insert routine.
366 static struct buffer_head
*__buffer_head_search(struct rb_root
*root
,
369 struct rb_node
*new = root
->rb_node
;
371 /* Figure out where to put new node */
373 struct buffer_head
*bh
=
374 container_of(new, struct buffer_head
, b_rb_node
);
375 s64 result
= blocknr
- bh
->b_blocknr
;
389 static int buffer_head_blocknr_cmp(struct rb_node
*a
, struct rb_node
*b
)
391 struct buffer_head
*a_bh
, *b_bh
;
393 a_bh
= container_of(a
, struct buffer_head
, b_rb_node
);
394 b_bh
= container_of(b
, struct buffer_head
, b_rb_node
);
395 result
= a_bh
->b_blocknr
- b_bh
->b_blocknr
;
404 static struct buffer_head
*buffer_head_search(struct block_device
*bdev
,
407 struct rb_root
*root
;
408 root
= &bdev
->bd_bh_root
;
409 return __buffer_head_search(root
, blocknr
);
412 static void buffer_head_insert(struct block_device
*bdev
, struct buffer_head
*bh
)
414 rb_insert(&bdev
->bd_bh_root
, &bh
->b_rb_node
, buffer_head_blocknr_cmp
);
417 static void buffer_head_remove(struct block_device
*bdev
, struct buffer_head
*bh
)
419 rb_erase(&bh
->b_rb_node
, &bdev
->bd_bh_root
);
424 struct block_device
* bdev
,
430 PEXT2_VCB Vcb
= bdev
->bd_priv
;
431 LARGE_INTEGER offset
;
436 struct list_head
*entry
;
438 /* allocate buffer_head and initialize it */
439 struct buffer_head
*bh
= NULL
, *tbh
= NULL
;
441 /* check the block is valid or not */
442 if (block
>= TOTAL_BLOCKS
) {
447 /* search the bdev bh list */
448 spin_lock_irqsave(&bdev
->bd_bh_lock
, irql
);
449 tbh
= buffer_head_search(bdev
, block
);
453 spin_unlock_irqrestore(&bdev
->bd_bh_lock
, irql
);
456 spin_unlock_irqrestore(&bdev
->bd_bh_lock
, irql
);
458 bh
= new_buffer_head();
463 bh
->b_blocknr
= block
;
466 atomic_inc(&g_jbh
.bh_count
);
467 atomic_inc(&g_jbh
.bh_acount
);
471 offset
.QuadPart
= (s64
) bh
->b_blocknr
;
472 offset
.QuadPart
<<= BLOCK_BITS
;
475 if (!CcPreparePinWrite(Vcb
->Volume
,
479 PIN_WAIT
| PIN_EXCLUSIVE
,
486 if (!CcPinRead( Vcb
->Volume
,
495 set_buffer_uptodate(bh
);
498 bh
->b_mdl
= Ext2CreateMdl(ptr
, TRUE
, bh
->b_size
, IoModifyAccess
);
500 /* muse map the PTE to NonCached zone. journal recovery will
501 access the PTE under spinlock: DISPATCH_LEVEL IRQL */
502 bh
->b_data
= MmMapLockedPagesSpecifyCache(
503 bh
->b_mdl
, KernelMode
, MmNonCached
,
504 NULL
,FALSE
, HighPagePriority
);
506 if (!bh
->b_mdl
|| !bh
->b_data
) {
507 free_buffer_head(bh
);
514 DEBUG(DL_BH
, ("getblk: Vcb=%p bhcount=%u block=%u bh=%p mdl=%p (Flags:%xh VA:%p)\n",
515 Vcb
, atomic_read(&g_jbh
.bh_count
), block
, bh
, bh
->b_mdl
, bh
->b_mdl
->MdlFlags
, bh
->b_data
));
517 spin_lock_irqsave(&bdev
->bd_bh_lock
, irql
);
519 /* do search again here */
520 tbh
= buffer_head_search(bdev
, block
);
522 free_buffer_head(bh
);
525 spin_unlock_irqrestore(&bdev
->bd_bh_lock
, irql
);
528 buffer_head_insert(bdev
, bh
);
530 spin_unlock_irqrestore(&bdev
->bd_bh_lock
, irql
);
543 struct block_device
* bdev
,
548 return get_block_bh(bdev
, block
, size
, 0);
551 int submit_bh(int rw
, struct buffer_head
*bh
)
553 struct block_device
*bdev
= bh
->b_bdev
;
554 PEXT2_VCB Vcb
= bdev
->bd_priv
;
557 LARGE_INTEGER Offset
;
559 ASSERT(Vcb
->Identifier
.Type
== EXT2VCB
);
564 if (IsVcbReadOnly(Vcb
)) {
568 SetFlag(Vcb
->Volume
->Flags
, FO_FILE_MODIFIED
);
569 Offset
.QuadPart
= ((LONGLONG
)bh
->b_blocknr
) << BLOCK_BITS
;
570 if (CcPreparePinWrite(
575 PIN_WAIT
| PIN_EXCLUSIVE
,
579 if (memcmp(Buffer
, bh
->b_data
, BLOCK_SIZE
) != 0) {
582 memmove(Buffer
, bh
->b_data
, BLOCK_SIZE
);
584 CcSetDirtyPinnedData(Bcb
, NULL
);
585 Ext2AddBlockExtent( Vcb
, NULL
,
586 (ULONG
)bh
->b_blocknr
,
587 (ULONG
)bh
->b_blocknr
,
588 (bh
->b_size
>> BLOCK_BITS
));
592 Ext2AddBlockExtent( Vcb
, NULL
,
593 (ULONG
)bh
->b_blocknr
,
594 (ULONG
)bh
->b_blocknr
,
595 (bh
->b_size
>> BLOCK_BITS
));
610 void __brelse(struct buffer_head
*bh
)
612 struct block_device
*bdev
= bh
->b_bdev
;
613 PEXT2_VCB Vcb
= (PEXT2_VCB
)bdev
->bd_priv
;
616 ASSERT(Vcb
->Identifier
.Type
== EXT2VCB
);
618 /* write data in case it's dirty */
619 while (buffer_dirty(bh
)) {
620 ll_rw_block(WRITE
, 1, &bh
);
623 spin_lock_irqsave(&bdev
->bd_bh_lock
, irql
);
624 if (!atomic_dec_and_test(&bh
->b_count
)) {
625 spin_unlock_irqrestore(&bdev
->bd_bh_lock
, irql
);
628 buffer_head_remove(bdev
, bh
);
629 spin_unlock_irqrestore(&bdev
->bd_bh_lock
, irql
);
631 DEBUG(DL_BH
, ("brelse: cnt=%u size=%u blk=%10.10xh bh=%p ptr=%p\n",
632 atomic_read(&g_jbh
.bh_count
) - 1, bh
->b_size
,
633 bh
->b_blocknr
, bh
, bh
->b_data
));
635 free_buffer_head(bh
);
636 atomic_dec(&g_jbh
.bh_count
);
639 void __bforget(struct buffer_head
*bh
)
641 clear_buffer_dirty(bh
);
645 void __lock_buffer(struct buffer_head
*bh
)
649 void unlock_buffer(struct buffer_head
*bh
)
651 clear_buffer_locked(bh
);
654 void __wait_on_buffer(struct buffer_head
*bh
)
658 void ll_rw_block(int rw
, int nr
, struct buffer_head
* bhs
[])
662 for (i
= 0; i
< nr
; i
++) {
664 struct buffer_head
*bh
= bhs
[i
];
668 else if (test_set_buffer_locked(bh
))
671 if (rw
== WRITE
|| rw
== SWRITE
) {
672 if (test_clear_buffer_dirty(bh
)) {
674 submit_bh(WRITE
, bh
);
678 if (!buffer_uptodate(bh
)) {
688 int bh_submit_read(struct buffer_head
*bh
)
690 ll_rw_block(READ
, 1, &bh
);
694 int sync_dirty_buffer(struct buffer_head
*bh
)
698 ASSERT(atomic_read(&bh
->b_count
) <= 1);
700 if (test_clear_buffer_dirty(bh
)) {
702 ret
= submit_bh(WRITE
, bh
);
710 void mark_buffer_dirty(struct buffer_head
*bh
)
712 set_buffer_dirty(bh
);
715 int sync_blockdev(struct block_device
*bdev
)
717 PEXT2_VCB Vcb
= (PEXT2_VCB
) bdev
->bd_priv
;
719 if (0 == atomic_read(&g_jbh
.bh_count
)) {
720 Ext2FlushVolume(NULL
, Vcb
, FALSE
);
726 * Perform a pagecache lookup for the matching buffer. If it's there, refre
727 * it in the LRU and mark it as accessed. If it is not present then return
731 __find_get_block(struct block_device
*bdev
, sector_t block
, unsigned long size
)
733 return __getblk(bdev
, block
, size
);
737 // inode block mapping
740 ULONGLONG
bmap(struct inode
*i
, ULONGLONG b
)
743 struct super_block
*s
= i
->i_sb
;
745 PEXT2_MCB Mcb
= (PEXT2_MCB
)i
->i_priv
;
746 PEXT2_VCB Vcb
= (PEXT2_VCB
)s
->s_priv
;
747 PEXT2_EXTENT extent
= NULL
;
748 ULONGLONG offset
= (ULONGLONG
)b
;
755 offset
<<= BLOCK_BITS
;
756 status
= Ext2BuildExtents(
766 if (!NT_SUCCESS(status
)) {
770 if (extent
== NULL
) {
774 lcn
= (unsigned long)(extent
->Lba
>> BLOCK_BITS
);
779 Ext2FreeExtent(extent
);
785 void iget(struct inode
*inode
)
787 atomic_inc(&inode
->i_count
);
790 void iput(struct inode
*inode
)
792 if (atomic_dec_and_test(&inode
->i_count
)) {
798 // initialzer and destructor