2 * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project.
4 * Copyright (c) 2001-2003 Anton Altaparmakov
6 * This program/include file is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as published
8 * by the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program/include file is distributed in the hope that it will be
12 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
13 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program (in the main directory of the Linux-NTFS
18 * distribution in the file COPYING); if not, write to the Free Software
19 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include <linux/pagemap.h>
23 #include <linux/buffer_head.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
26 #include <linux/mount.h>
34 * ntfs_attr - ntfs in memory attribute structure
35 * @mft_no: mft record number of the base mft record of this attribute
36 * @name: Unicode name of the attribute (NULL if unnamed)
37 * @name_len: length of @name in Unicode characters (0 if unnamed)
38 * @type: attribute type (see layout.h)
40 * This structure exists only to provide a small structure for the
41 * ntfs_{attr_}iget()/ntfs_test_inode()/ntfs_init_locked_inode() mechanism.
43 * NOTE: Elements are ordered by size to make the structure as compact as
44 * possible on all architectures.
54 * ntfs_test_inode - compare two (possibly fake) inodes for equality
55 * @vi: vfs inode which to test
56 * @na: ntfs attribute which is being tested with
58 * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
59 * inode @vi for equality with the ntfs attribute @na.
61 * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
62 * @na->name and @na->name_len are then ignored.
64 * Return 1 if the attributes match and 0 if not.
66 * NOTE: This function runs with the inode_lock spin lock held so it is not
69 static int ntfs_test_inode(struct inode
*vi
, ntfs_attr
*na
)
73 if (vi
->i_ino
!= na
->mft_no
)
76 /* If !NInoAttr(ni), @vi is a normal file or directory inode. */
77 if (likely(!NInoAttr(ni
))) {
78 /* If not looking for a normal inode this is a mismatch. */
79 if (unlikely(na
->type
!= AT_UNUSED
))
82 /* A fake inode describing an attribute. */
83 if (ni
->type
!= na
->type
)
85 if (ni
->name_len
!= na
->name_len
)
87 if (na
->name_len
&& memcmp(ni
->name
, na
->name
,
88 na
->name_len
* sizeof(uchar_t
)))
96 * ntfs_init_locked_inode - initialize an inode
97 * @vi: vfs inode to initialize
98 * @na: ntfs attribute which to initialize @vi to
100 * Initialize the vfs inode @vi with the values from the ntfs attribute @na in
101 * order to enable ntfs_test_inode() to do its work.
103 * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
104 * In that case, @na->name and @na->name_len should be set to NULL and 0,
105 * respectively. Although that is not strictly necessary as
106 * ntfs_read_inode_locked() will fill them in later.
108 * Return 0 on success and -errno on error.
110 * NOTE: This function runs with the inode_lock spin lock held so it is not
111 * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
113 static int ntfs_init_locked_inode(struct inode
*vi
, ntfs_attr
*na
)
115 ntfs_inode
*ni
= NTFS_I(vi
);
117 vi
->i_ino
= na
->mft_no
;
120 if (na
->type
== AT_INDEX_ALLOCATION
)
121 NInoSetMstProtected(ni
);
124 ni
->name_len
= na
->name_len
;
126 /* If initializing a normal inode, we are done. */
127 if (likely(na
->type
== AT_UNUSED
))
130 /* It is a fake inode. */
134 * We have I30 global constant as an optimization as it is the name
135 * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
136 * allocation but that is ok. And most attributes are unnamed anyway,
137 * thus the fraction of named attributes with name != I30 is actually
140 if (na
->name
&& na
->name_len
&& na
->name
!= I30
) {
143 i
= na
->name_len
* sizeof(uchar_t
);
144 ni
->name
= (uchar_t
*)kmalloc(i
+ sizeof(uchar_t
), GFP_ATOMIC
);
147 memcpy(ni
->name
, na
->name
, i
);
148 ni
->name
[i
] = cpu_to_le16('\0');
153 typedef int (*test_t
)(struct inode
*, void *);
154 typedef int (*set_t
)(struct inode
*, void *);
155 static int ntfs_read_locked_inode(struct inode
*vi
);
156 static int ntfs_read_locked_attr_inode(struct inode
*base_vi
, struct inode
*vi
);
159 * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
160 * @sb: super block of mounted volume
161 * @mft_no: mft record number / inode number to obtain
163 * Obtain the struct inode corresponding to a specific normal inode (i.e. a
164 * file or directory).
166 * If the inode is in the cache, it is just returned with an increased
167 * reference count. Otherwise, a new struct inode is allocated and initialized,
168 * and finally ntfs_read_locked_inode() is called to read in the inode and
169 * fill in the remainder of the inode structure.
171 * Return the struct inode on success. Check the return value with IS_ERR() and
172 * if true, the function failed and the error code is obtained from PTR_ERR().
174 struct inode
*ntfs_iget(struct super_block
*sb
, unsigned long mft_no
)
185 vi
= iget5_locked(sb
, mft_no
, (test_t
)ntfs_test_inode
,
186 (set_t
)ntfs_init_locked_inode
, &na
);
188 return ERR_PTR(-ENOMEM
);
192 /* If this is a freshly allocated inode, need to read it now. */
193 if (vi
->i_state
& I_NEW
) {
194 err
= ntfs_read_locked_inode(vi
);
195 unlock_new_inode(vi
);
198 * There is no point in keeping bad inodes around if the failure was
199 * due to ENOMEM. We want to be able to retry again layer.
201 if (err
== -ENOMEM
) {
209 * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
210 * @base_vi: vfs base inode containing the attribute
211 * @type: attribute type
212 * @name: Unicode name of the attribute (NULL if unnamed)
213 * @name_len: length of @name in Unicode characters (0 if unnamed)
215 * Obtain the (fake) struct inode corresponding to the attribute specified by
216 * @type, @name, and @name_len, which is present in the base mft record
217 * specified by the vfs inode @base_vi.
219 * If the attribute inode is in the cache, it is just returned with an
220 * increased reference count. Otherwise, a new struct inode is allocated and
221 * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
222 * attribute and fill in the inode structure.
224 * Return the struct inode of the attribute inode on success. Check the return
225 * value with IS_ERR() and if true, the function failed and the error code is
226 * obtained from PTR_ERR().
228 struct inode
*ntfs_attr_iget(struct inode
*base_vi
, ATTR_TYPES type
,
229 uchar_t
*name
, u32 name_len
)
235 na
.mft_no
= base_vi
->i_ino
;
238 na
.name_len
= name_len
;
240 vi
= iget5_locked(base_vi
->i_sb
, na
.mft_no
, (test_t
)ntfs_test_inode
,
241 (set_t
)ntfs_init_locked_inode
, &na
);
243 return ERR_PTR(-ENOMEM
);
247 /* If this is a freshly allocated inode, need to read it now. */
248 if (vi
->i_state
& I_NEW
) {
249 err
= ntfs_read_locked_attr_inode(base_vi
, vi
);
250 unlock_new_inode(vi
);
253 * There is no point in keeping bad attribute inodes around. This also
254 * simplifies things in that we never need to check for bad attribute
264 struct inode
*ntfs_alloc_big_inode(struct super_block
*sb
)
268 ntfs_debug("Entering.");
269 ni
= (ntfs_inode
*)kmem_cache_alloc(ntfs_big_inode_cache
,
271 if (likely(ni
!= NULL
)) {
275 ntfs_error(sb
, "Allocation of NTFS big inode structure failed.");
279 void ntfs_destroy_big_inode(struct inode
*inode
)
281 ntfs_inode
*ni
= NTFS_I(inode
);
283 ntfs_debug("Entering.");
285 if (!atomic_dec_and_test(&ni
->count
))
287 kmem_cache_free(ntfs_big_inode_cache
, NTFS_I(inode
));
290 static inline ntfs_inode
*ntfs_alloc_extent_inode(void)
294 ntfs_debug("Entering.");
295 ni
= (ntfs_inode
*)kmem_cache_alloc(ntfs_inode_cache
, SLAB_NOFS
);
296 if (likely(ni
!= NULL
)) {
300 ntfs_error(NULL
, "Allocation of NTFS inode structure failed.");
304 void ntfs_destroy_extent_inode(ntfs_inode
*ni
)
306 ntfs_debug("Entering.");
308 if (!atomic_dec_and_test(&ni
->count
))
310 kmem_cache_free(ntfs_inode_cache
, ni
);
314 * __ntfs_init_inode - initialize ntfs specific part of an inode
315 * @sb: super block of mounted volume
316 * @ni: freshly allocated ntfs inode which to initialize
318 * Initialize an ntfs inode to defaults.
320 * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
321 * untouched. Make sure to initialize them elsewhere.
323 * Return zero on success and -ENOMEM on error.
325 static void __ntfs_init_inode(struct super_block
*sb
, ntfs_inode
*ni
)
327 ntfs_debug("Entering.");
328 ni
->initialized_size
= ni
->allocated_size
= 0;
330 atomic_set(&ni
->count
, 1);
331 ni
->vol
= NTFS_SB(sb
);
332 init_run_list(&ni
->run_list
);
333 init_MUTEX(&ni
->mrec_lock
);
336 ni
->attr_list_size
= 0;
337 ni
->attr_list
= NULL
;
338 init_run_list(&ni
->attr_list_rl
);
339 ni
->itype
.index
.bmp_ino
= NULL
;
340 ni
->itype
.index
.block_size
= 0;
341 ni
->itype
.index
.vcn_size
= 0;
342 ni
->itype
.index
.block_size_bits
= 0;
343 ni
->itype
.index
.vcn_size_bits
= 0;
344 init_MUTEX(&ni
->extent_lock
);
346 ni
->ext
.base_ntfs_ino
= NULL
;
350 static inline void ntfs_init_big_inode(struct inode
*vi
)
352 ntfs_inode
*ni
= NTFS_I(vi
);
354 ntfs_debug("Entering.");
355 __ntfs_init_inode(vi
->i_sb
, ni
);
356 ni
->mft_no
= vi
->i_ino
;
360 inline ntfs_inode
*ntfs_new_extent_inode(struct super_block
*sb
,
361 unsigned long mft_no
)
363 ntfs_inode
*ni
= ntfs_alloc_extent_inode();
365 ntfs_debug("Entering.");
366 if (likely(ni
!= NULL
)) {
367 __ntfs_init_inode(sb
, ni
);
369 ni
->type
= AT_UNUSED
;
377 * ntfs_is_extended_system_file - check if a file is in the $Extend directory
378 * @ctx: initialized attribute search context
380 * Search all file name attributes in the inode described by the attribute
381 * search context @ctx and check if any of the names are in the $Extend system
385 * 1: file is in $Extend directory
386 * 0: file is not in $Extend directory
387 * -EIO: file is corrupt
389 static int ntfs_is_extended_system_file(attr_search_context
*ctx
)
393 /* Restart search. */
394 reinit_attr_search_ctx(ctx
);
396 /* Get number of hard links. */
397 nr_links
= le16_to_cpu(ctx
->mrec
->link_count
);
399 /* Loop through all hard links. */
400 while (lookup_attr(AT_FILE_NAME
, NULL
, 0, 0, 0, NULL
, 0, ctx
)) {
401 FILE_NAME_ATTR
*file_name_attr
;
402 ATTR_RECORD
*attr
= ctx
->attr
;
407 * Maximum sanity checking as we are called on an inode that
408 * we suspect might be corrupt.
410 p
= (u8
*)attr
+ le32_to_cpu(attr
->length
);
411 if (p
< (u8
*)ctx
->mrec
|| (u8
*)p
> (u8
*)ctx
->mrec
+
412 le32_to_cpu(ctx
->mrec
->bytes_in_use
)) {
414 ntfs_error(ctx
->ntfs_ino
->vol
->sb
, "Corrupt file name "
415 "attribute. You should run chkdsk.");
418 if (attr
->non_resident
) {
419 ntfs_error(ctx
->ntfs_ino
->vol
->sb
, "Non-resident file "
420 "name. You should run chkdsk.");
424 ntfs_error(ctx
->ntfs_ino
->vol
->sb
, "File name with "
425 "invalid flags. You should run "
429 if (!(attr
->data
.resident
.flags
& RESIDENT_ATTR_IS_INDEXED
)) {
430 ntfs_error(ctx
->ntfs_ino
->vol
->sb
, "Unindexed file "
431 "name. You should run chkdsk.");
434 file_name_attr
= (FILE_NAME_ATTR
*)((u8
*)attr
+
435 le16_to_cpu(attr
->data
.resident
.value_offset
));
436 p2
= (u8
*)attr
+ le32_to_cpu(attr
->data
.resident
.value_length
);
437 if (p2
< (u8
*)attr
|| p2
> p
)
438 goto err_corrupt_attr
;
439 /* This attribute is ok, but is it in the $Extend directory? */
440 if (MREF_LE(file_name_attr
->parent_directory
) == FILE_Extend
)
441 return 1; /* YES, it's an extended system file. */
444 ntfs_error(ctx
->ntfs_ino
->vol
->sb
, "Inode hard link count "
445 "doesn't match number of name attributes. You "
446 "should run chkdsk.");
449 return 0; /* NO, it is not an extended system file. */
453 * ntfs_read_locked_inode - read an inode from its device
456 * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
457 * described by @vi into memory from the device.
459 * The only fields in @vi that we need to/can look at when the function is
460 * called are i_sb, pointing to the mounted device's super block, and i_ino,
461 * the number of the inode to load. If this is a fake inode, i.e. NInoAttr(),
462 * then the fields type, name, and name_len are also valid, and describe the
463 * attribute which this fake inode represents.
465 * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
466 * for reading and sets up the necessary @vi fields as well as initializing
469 * Q: What locks are held when the function is called?
470 * A: i_state has I_LOCK set, hence the inode is locked, also
471 * i_count is set to 1, so it is not going to go away
472 * i_flags is set to 0 and we have no business touching it. Only an ioctl()
473 * is allowed to write to them. We should of course be honouring them but
474 * we need to do that using the IS_* macros defined in include/linux/fs.h.
475 * In any case ntfs_read_locked_inode() has nothing to do with i_flags.
477 * Return 0 on success and -errno on error. In the error case, the inode will
478 * have had make_bad_inode() executed on it.
480 static int ntfs_read_locked_inode(struct inode
*vi
)
482 ntfs_volume
*vol
= NTFS_SB(vi
->i_sb
);
485 STANDARD_INFORMATION
*si
;
486 attr_search_context
*ctx
;
489 ntfs_debug("Entering for i_ino 0x%lx.", vi
->i_ino
);
491 /* Setup the generic vfs inode parts now. */
493 /* This is the optimal IO size (for stat), not the fs block size. */
494 vi
->i_blksize
= PAGE_CACHE_SIZE
;
496 * This is for checking whether an inode has changed w.r.t. a file so
497 * that the file can be updated if necessary (compare with f_version).
501 vi
->i_uid
= vol
->uid
;
502 vi
->i_gid
= vol
->gid
;
506 * Initialize the ntfs specific part of @vi special casing
507 * FILE_MFT which we need to do at mount time.
509 if (vi
->i_ino
!= FILE_MFT
)
510 ntfs_init_big_inode(vi
);
513 m
= map_mft_record(ni
);
518 ctx
= get_attr_search_ctx(ni
, m
);
524 if (!(m
->flags
& MFT_RECORD_IN_USE
)) {
525 ntfs_error(vi
->i_sb
, "Inode is not in use! You should "
529 if (m
->base_mft_record
) {
530 ntfs_error(vi
->i_sb
, "Inode is an extent inode! You should "
535 /* Transfer information from mft record into vfs and ntfs inodes. */
536 ni
->seq_no
= le16_to_cpu(m
->sequence_number
);
539 * FIXME: Keep in mind that link_count is two for files which have both
540 * a long file name and a short file name as separate entries, so if
541 * we are hiding short file names this will be too high. Either we need
542 * to account for the short file names by subtracting them or we need
543 * to make sure we delete files even though i_nlink is not zero which
544 * might be tricky due to vfs interactions. Need to think about this
545 * some more when implementing the unlink command.
547 vi
->i_nlink
= le16_to_cpu(m
->link_count
);
549 * FIXME: Reparse points can have the directory bit set even though
550 * they would be S_IFLNK. Need to deal with this further below when we
551 * implement reparse points / symbolic links but it will do for now.
552 * Also if not a directory, it could be something else, rather than
553 * a regular file. But again, will do for now.
555 if (m
->flags
& MFT_RECORD_IS_DIRECTORY
) {
556 vi
->i_mode
|= S_IFDIR
;
557 /* Things break without this kludge! */
561 vi
->i_mode
|= S_IFREG
;
564 * Find the standard information attribute in the mft record. At this
565 * stage we haven't setup the attribute list stuff yet, so this could
566 * in fact fail if the standard information is in an extent record, but
567 * I don't think this actually ever happens.
569 if (!lookup_attr(AT_STANDARD_INFORMATION
, NULL
, 0, 0, 0, NULL
, 0,
572 * TODO: We should be performing a hot fix here (if the recover
573 * mount option is set) by creating a new attribute.
575 ntfs_error(vi
->i_sb
, "$STANDARD_INFORMATION attribute is "
579 /* Get the standard information attribute value. */
580 si
= (STANDARD_INFORMATION
*)((char*)ctx
->attr
+
581 le16_to_cpu(ctx
->attr
->data
.resident
.value_offset
));
583 /* Transfer information from the standard information into vfs_ino. */
585 * Note: The i_?times do not quite map perfectly onto the NTFS times,
586 * but they are close enough, and in the end it doesn't really matter
590 * mtime is the last change of the data within the file. Not changed
591 * when only metadata is changed, e.g. a rename doesn't affect mtime.
593 vi
->i_mtime
.tv_sec
= ntfs2utc(si
->last_data_change_time
);
594 vi
->i_mtime
.tv_nsec
= 0;
596 * ctime is the last change of the metadata of the file. This obviously
597 * always changes, when mtime is changed. ctime can be changed on its
598 * own, mtime is then not changed, e.g. when a file is renamed.
600 vi
->i_ctime
.tv_sec
= ntfs2utc(si
->last_mft_change_time
);
601 vi
->i_ctime
.tv_nsec
= 0;
603 * Last access to the data within the file. Not changed during a rename
604 * for example but changed whenever the file is written to.
606 vi
->i_atime
.tv_sec
= ntfs2utc(si
->last_access_time
);
607 vi
->i_atime
.tv_nsec
= 0;
609 /* Find the attribute list attribute if present. */
610 reinit_attr_search_ctx(ctx
);
611 if (lookup_attr(AT_ATTRIBUTE_LIST
, NULL
, 0, 0, 0, NULL
, 0, ctx
)) {
612 if (vi
->i_ino
== FILE_MFT
)
613 goto skip_attr_list_load
;
614 ntfs_debug("Attribute list found in inode 0x%lx.", vi
->i_ino
);
616 if (ctx
->attr
->flags
& ATTR_IS_ENCRYPTED
||
617 ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
||
618 ctx
->attr
->flags
& ATTR_IS_SPARSE
) {
619 ntfs_error(vi
->i_sb
, "Attribute list attribute is "
620 "compressed/encrypted/sparse. Not "
621 "allowed. Corrupt inode. You should "
625 /* Now allocate memory for the attribute list. */
626 ni
->attr_list_size
= (u32
)attribute_value_length(ctx
->attr
);
627 ni
->attr_list
= ntfs_malloc_nofs(ni
->attr_list_size
);
628 if (!ni
->attr_list
) {
629 ntfs_error(vi
->i_sb
, "Not enough memory to allocate "
630 "buffer for attribute list.");
634 if (ctx
->attr
->non_resident
) {
635 NInoSetAttrListNonResident(ni
);
636 if (ctx
->attr
->data
.non_resident
.lowest_vcn
) {
637 ntfs_error(vi
->i_sb
, "Attribute list has non "
638 "zero lowest_vcn. Inode is "
639 "corrupt. You should run "
644 * Setup the run list. No need for locking as we have
645 * exclusive access to the inode at this time.
647 ni
->attr_list_rl
.rl
= decompress_mapping_pairs(vol
,
649 if (IS_ERR(ni
->attr_list_rl
.rl
)) {
650 err
= PTR_ERR(ni
->attr_list_rl
.rl
);
651 ni
->attr_list_rl
.rl
= NULL
;
652 ntfs_error(vi
->i_sb
, "Mapping pairs "
653 "decompression failed with "
654 "error code %i. Corrupt "
655 "attribute list in inode.",
659 /* Now load the attribute list. */
660 if ((err
= load_attribute_list(vol
, &ni
->attr_list_rl
,
661 ni
->attr_list
, ni
->attr_list_size
,
662 sle64_to_cpu(ctx
->attr
->data
.
663 non_resident
.initialized_size
)))) {
664 ntfs_error(vi
->i_sb
, "Failed to load "
665 "attribute list attribute.");
668 } else /* if (!ctx.attr->non_resident) */ {
669 if ((u8
*)ctx
->attr
+ le16_to_cpu(
670 ctx
->attr
->data
.resident
.value_offset
) +
672 ctx
->attr
->data
.resident
.value_length
) >
673 (u8
*)ctx
->mrec
+ vol
->mft_record_size
) {
674 ntfs_error(vi
->i_sb
, "Corrupt attribute list "
678 /* Now copy the attribute list. */
679 memcpy(ni
->attr_list
, (u8
*)ctx
->attr
+ le16_to_cpu(
680 ctx
->attr
->data
.resident
.value_offset
),
682 ctx
->attr
->data
.resident
.value_length
));
687 * If an attribute list is present we now have the attribute list value
688 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
690 if (S_ISDIR(vi
->i_mode
)) {
694 char *ir_end
, *index_end
;
696 /* It is a directory, find index root attribute. */
697 reinit_attr_search_ctx(ctx
);
698 if (!lookup_attr(AT_INDEX_ROOT
, I30
, 4, CASE_SENSITIVE
, 0,
700 // FIXME: File is corrupt! Hot-fix with empty index
701 // root attribute if recovery option is set.
702 ntfs_error(vi
->i_sb
, "$INDEX_ROOT attribute is "
706 /* Set up the state. */
707 if (ctx
->attr
->non_resident
) {
708 ntfs_error(vi
->i_sb
, "$INDEX_ROOT attribute is "
709 "not resident. Not allowed.");
713 * Compressed/encrypted index root just means that the newly
714 * created files in that directory should be created compressed/
715 * encrypted. However index root cannot be both compressed and
718 if (ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
)
719 NInoSetCompressed(ni
);
720 if (ctx
->attr
->flags
& ATTR_IS_ENCRYPTED
) {
721 if (ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
) {
722 ntfs_error(vi
->i_sb
, "Found encrypted and "
723 "compressed attribute. Not "
727 NInoSetEncrypted(ni
);
729 if (ctx
->attr
->flags
& ATTR_IS_SPARSE
)
731 ir
= (INDEX_ROOT
*)((char*)ctx
->attr
+ le16_to_cpu(
732 ctx
->attr
->data
.resident
.value_offset
));
733 ir_end
= (char*)ir
+ le32_to_cpu(
734 ctx
->attr
->data
.resident
.value_length
);
735 if (ir_end
> (char*)ctx
->mrec
+ vol
->mft_record_size
) {
736 ntfs_error(vi
->i_sb
, "$INDEX_ROOT attribute is "
740 index_end
= (char*)&ir
->index
+
741 le32_to_cpu(ir
->index
.index_length
);
742 if (index_end
> ir_end
) {
743 ntfs_error(vi
->i_sb
, "Directory index is corrupt.");
746 if (ir
->type
!= AT_FILE_NAME
) {
747 ntfs_error(vi
->i_sb
, "Indexed attribute is not "
748 "$FILE_NAME. Not allowed.");
751 if (ir
->collation_rule
!= COLLATION_FILE_NAME
) {
752 ntfs_error(vi
->i_sb
, "Index collation rule is not "
753 "COLLATION_FILE_NAME. Not allowed.");
756 ni
->itype
.index
.block_size
= le32_to_cpu(ir
->index_block_size
);
757 if (ni
->itype
.index
.block_size
&
758 (ni
->itype
.index
.block_size
- 1)) {
759 ntfs_error(vi
->i_sb
, "Index block size (%u) is not a "
761 ni
->itype
.index
.block_size
);
764 if (ni
->itype
.index
.block_size
> PAGE_CACHE_SIZE
) {
765 ntfs_error(vi
->i_sb
, "Index block size (%u) > "
766 "PAGE_CACHE_SIZE (%ld) is not "
768 ni
->itype
.index
.block_size
,
773 if (ni
->itype
.index
.block_size
< NTFS_BLOCK_SIZE
) {
774 ntfs_error(vi
->i_sb
, "Index block size (%u) < "
775 "NTFS_BLOCK_SIZE (%i) is not "
777 ni
->itype
.index
.block_size
,
782 ni
->itype
.index
.block_size_bits
=
783 ffs(ni
->itype
.index
.block_size
) - 1;
784 /* Determine the size of a vcn in the directory index. */
785 if (vol
->cluster_size
<= ni
->itype
.index
.block_size
) {
786 ni
->itype
.index
.vcn_size
= vol
->cluster_size
;
787 ni
->itype
.index
.vcn_size_bits
= vol
->cluster_size_bits
;
789 ni
->itype
.index
.vcn_size
= vol
->sector_size
;
790 ni
->itype
.index
.vcn_size_bits
= vol
->sector_size_bits
;
793 /* Setup the index allocation attribute, even if not present. */
794 NInoSetMstProtected(ni
);
795 ni
->type
= AT_INDEX_ALLOCATION
;
799 if (!(ir
->index
.flags
& LARGE_INDEX
)) {
800 /* No index allocation. */
801 vi
->i_size
= ni
->initialized_size
=
802 ni
->allocated_size
= 0;
803 /* We are done with the mft record, so we release it. */
804 put_attr_search_ctx(ctx
);
805 unmap_mft_record(ni
);
808 goto skip_large_dir_stuff
;
809 } /* LARGE_INDEX: Index allocation present. Setup state. */
810 NInoSetIndexAllocPresent(ni
);
811 /* Find index allocation attribute. */
812 reinit_attr_search_ctx(ctx
);
813 if (!lookup_attr(AT_INDEX_ALLOCATION
, I30
, 4, CASE_SENSITIVE
,
815 ntfs_error(vi
->i_sb
, "$INDEX_ALLOCATION attribute "
816 "is not present but $INDEX_ROOT "
820 if (!ctx
->attr
->non_resident
) {
821 ntfs_error(vi
->i_sb
, "$INDEX_ALLOCATION attribute "
825 if (ctx
->attr
->flags
& ATTR_IS_ENCRYPTED
) {
826 ntfs_error(vi
->i_sb
, "$INDEX_ALLOCATION attribute "
830 if (ctx
->attr
->flags
& ATTR_IS_SPARSE
) {
831 ntfs_error(vi
->i_sb
, "$INDEX_ALLOCATION attribute "
835 if (ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
) {
836 ntfs_error(vi
->i_sb
, "$INDEX_ALLOCATION attribute "
840 if (ctx
->attr
->data
.non_resident
.lowest_vcn
) {
841 ntfs_error(vi
->i_sb
, "First extent of "
842 "$INDEX_ALLOCATION attribute has non "
843 "zero lowest_vcn. Inode is corrupt. "
844 "You should run chkdsk.");
847 vi
->i_size
= sle64_to_cpu(
848 ctx
->attr
->data
.non_resident
.data_size
);
849 ni
->initialized_size
= sle64_to_cpu(
850 ctx
->attr
->data
.non_resident
.initialized_size
);
851 ni
->allocated_size
= sle64_to_cpu(
852 ctx
->attr
->data
.non_resident
.allocated_size
);
854 * We are done with the mft record, so we release it. Otherwise
855 * we would deadlock in ntfs_attr_iget().
857 put_attr_search_ctx(ctx
);
858 unmap_mft_record(ni
);
861 /* Get the index bitmap attribute inode. */
862 bvi
= ntfs_attr_iget(vi
, AT_BITMAP
, I30
, 4);
863 if (unlikely(IS_ERR(bvi
))) {
864 ntfs_error(vi
->i_sb
, "Failed to get bitmap attribute.");
868 ni
->itype
.index
.bmp_ino
= bvi
;
870 if (NInoCompressed(bni
) || NInoEncrypted(bni
) ||
872 ntfs_error(vi
->i_sb
, "$BITMAP attribute is compressed "
873 "and/or encrypted and/or sparse.");
876 /* Consistency check bitmap size vs. index allocation size. */
877 if ((bvi
->i_size
<< 3) < (vi
->i_size
>>
878 ni
->itype
.index
.block_size_bits
)) {
879 ntfs_error(vi
->i_sb
, "Index bitmap too small (0x%Lx) "
880 "for index allocation (0x%Lx).",
881 bvi
->i_size
<< 3, vi
->i_size
);
884 skip_large_dir_stuff
:
885 /* Everyone gets read and scan permissions. */
886 vi
->i_mode
|= S_IRUGO
| S_IXUGO
;
887 /* If not read-only, set write permissions. */
889 vi
->i_mode
|= S_IWUGO
;
891 * Apply the directory permissions mask set in the mount
894 vi
->i_mode
&= ~vol
->dmask
;
895 /* Setup the operations for this inode. */
896 vi
->i_op
= &ntfs_dir_inode_ops
;
897 vi
->i_fop
= &ntfs_dir_ops
;
898 vi
->i_mapping
->a_ops
= &ntfs_aops
;
901 reinit_attr_search_ctx(ctx
);
903 /* Setup the data attribute, even if not present. */
908 /* Find first extent of the unnamed data attribute. */
909 if (!lookup_attr(AT_DATA
, NULL
, 0, 0, 0, NULL
, 0, ctx
)) {
910 vi
->i_size
= ni
->initialized_size
=
911 ni
->allocated_size
= 0LL;
913 * FILE_Secure does not have an unnamed $DATA
914 * attribute, so we special case it here.
916 if (vi
->i_ino
== FILE_Secure
)
917 goto no_data_attr_special_case
;
919 * Most if not all the system files in the $Extend
920 * system directory do not have unnamed data
921 * attributes so we need to check if the parent
922 * directory of the file is FILE_Extend and if it is
923 * ignore this error. To do this we need to get the
924 * name of this inode from the mft record as the name
925 * contains the back reference to the parent directory.
927 if (ntfs_is_extended_system_file(ctx
) > 0)
928 goto no_data_attr_special_case
;
929 // FIXME: File is corrupt! Hot-fix with empty data
930 // attribute if recovery option is set.
931 ntfs_error(vi
->i_sb
, "$DATA attribute is "
935 /* Setup the state. */
936 if (ctx
->attr
->non_resident
) {
937 NInoSetNonResident(ni
);
938 if (ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
) {
939 NInoSetCompressed(ni
);
940 if (vol
->cluster_size
> 4096) {
941 ntfs_error(vi
->i_sb
, "Found "
942 "compressed data but "
943 "compression is disabled due "
944 "to cluster size (%i) > 4kiB.",
948 if ((ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
)
949 != ATTR_IS_COMPRESSED
) {
950 ntfs_error(vi
->i_sb
, "Found "
951 "unknown compression method or "
955 ni
->itype
.compressed
.block_clusters
= 1U <<
956 ctx
->attr
->data
.non_resident
.
958 if (ctx
->attr
->data
.non_resident
.
959 compression_unit
!= 4) {
960 ntfs_error(vi
->i_sb
, "Found "
961 "nonstandard compression unit "
962 "(%u instead of 4). Cannot "
963 "handle this. This might "
964 "indicate corruption so you "
965 "should run chkdsk.",
966 ctx
->attr
->data
.non_resident
.
971 ni
->itype
.compressed
.block_size
= 1U << (
972 ctx
->attr
->data
.non_resident
.
974 vol
->cluster_size_bits
);
975 ni
->itype
.compressed
.block_size_bits
= ffs(
976 ni
->itype
.compressed
.block_size
) - 1;
978 if (ctx
->attr
->flags
& ATTR_IS_ENCRYPTED
) {
979 if (ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
) {
980 ntfs_error(vi
->i_sb
, "Found encrypted "
981 "and compressed data.");
984 NInoSetEncrypted(ni
);
986 if (ctx
->attr
->flags
& ATTR_IS_SPARSE
)
988 if (ctx
->attr
->data
.non_resident
.lowest_vcn
) {
989 ntfs_error(vi
->i_sb
, "First extent of $DATA "
990 "attribute has non zero "
991 "lowest_vcn. Inode is corrupt. "
992 "You should run chkdsk.");
995 /* Setup all the sizes. */
996 vi
->i_size
= sle64_to_cpu(
997 ctx
->attr
->data
.non_resident
.data_size
);
998 ni
->initialized_size
= sle64_to_cpu(
999 ctx
->attr
->data
.non_resident
.
1001 ni
->allocated_size
= sle64_to_cpu(
1002 ctx
->attr
->data
.non_resident
.
1004 if (NInoCompressed(ni
)) {
1005 ni
->itype
.compressed
.size
= sle64_to_cpu(
1006 ctx
->attr
->data
.non_resident
.
1009 } else { /* Resident attribute. */
1011 * Make all sizes equal for simplicity in read code
1012 * paths. FIXME: Need to keep this in mind when
1013 * converting to non-resident attribute in write code
1014 * path. (Probably only affects truncate().)
1016 vi
->i_size
= ni
->initialized_size
= ni
->allocated_size
=
1018 ctx
->attr
->data
.resident
.value_length
);
1020 no_data_attr_special_case
:
1021 /* We are done with the mft record, so we release it. */
1022 put_attr_search_ctx(ctx
);
1023 unmap_mft_record(ni
);
1026 /* Everyone gets all permissions. */
1027 vi
->i_mode
|= S_IRWXUGO
;
1028 /* If read-only, noone gets write permissions. */
1030 vi
->i_mode
&= ~S_IWUGO
;
1031 /* Apply the file permissions mask set in the mount options. */
1032 vi
->i_mode
&= ~vol
->fmask
;
1033 /* Setup the operations for this inode. */
1034 vi
->i_op
= &ntfs_file_inode_ops
;
1035 vi
->i_fop
= &ntfs_file_ops
;
1036 vi
->i_mapping
->a_ops
= &ntfs_aops
;
1039 * The number of 512-byte blocks used on disk (for stat). This is in so
1040 * far inaccurate as it doesn't account for any named streams or other
1041 * special non-resident attributes, but that is how Windows works, too,
1042 * so we are at least consistent with Windows, if not entirely
1043 * consistent with the Linux Way. Doing it the Linux Way would cause a
1044 * significant slowdown as it would involve iterating over all
1045 * attributes in the mft record and adding the allocated/compressed
1046 * sizes of all non-resident attributes present to give us the Linux
1047 * correct size that should go into i_blocks (after division by 512).
1049 if (!NInoCompressed(ni
))
1050 vi
->i_blocks
= ni
->allocated_size
>> 9;
1052 vi
->i_blocks
= ni
->itype
.compressed
.size
>> 9;
1054 ntfs_debug("Done.");
1061 put_attr_search_ctx(ctx
);
1063 unmap_mft_record(ni
);
1065 ntfs_error(vi
->i_sb
, "Failed with error code %i. Marking inode 0x%lx "
1066 "as bad.", -err
, vi
->i_ino
);
1072 * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
1073 * @base_vi: base inode
1074 * @vi: attribute inode to read
1076 * ntfs_read_locked_attr_inode() is called from the ntfs_attr_iget() to read
1077 * the attribute inode described by @vi into memory from the base mft record
1078 * described by @base_ni.
1080 * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
1081 * reading and looks up the attribute described by @vi before setting up the
1082 * necessary fields in @vi as well as initializing the ntfs inode.
1084 * Q: What locks are held when the function is called?
1085 * A: i_state has I_LOCK set, hence the inode is locked, also
1086 * i_count is set to 1, so it is not going to go away
1088 static int ntfs_read_locked_attr_inode(struct inode
*base_vi
, struct inode
*vi
)
1090 ntfs_volume
*vol
= NTFS_SB(vi
->i_sb
);
1091 ntfs_inode
*ni
, *base_ni
;
1093 attr_search_context
*ctx
;
1096 ntfs_debug("Entering for i_ino 0x%lx.", vi
->i_ino
);
1098 ntfs_init_big_inode(vi
);
1101 base_ni
= NTFS_I(base_vi
);
1103 /* Just mirror the values from the base inode. */
1104 vi
->i_blksize
= base_vi
->i_blksize
;
1105 vi
->i_version
= base_vi
->i_version
;
1106 vi
->i_uid
= base_vi
->i_uid
;
1107 vi
->i_gid
= base_vi
->i_gid
;
1108 vi
->i_nlink
= base_vi
->i_nlink
;
1109 vi
->i_mtime
= base_vi
->i_mtime
;
1110 vi
->i_ctime
= base_vi
->i_ctime
;
1111 vi
->i_atime
= base_vi
->i_atime
;
1112 ni
->seq_no
= base_ni
->seq_no
;
1114 /* Set inode type to zero but preserve permissions. */
1115 vi
->i_mode
= base_vi
->i_mode
& ~S_IFMT
;
1117 m
= map_mft_record(base_ni
);
1122 ctx
= get_attr_search_ctx(base_ni
, m
);
1128 /* Find the attribute. */
1129 if (!lookup_attr(ni
->type
, ni
->name
, ni
->name_len
, IGNORE_CASE
, 0,
1133 if (!ctx
->attr
->non_resident
) {
1134 if (NInoMstProtected(ni
) || ctx
->attr
->flags
) {
1135 ntfs_error(vi
->i_sb
, "Found mst protected attribute "
1136 "or attribute with non-zero flags but "
1137 "the attribute is resident (mft_no "
1138 "0x%lx, type 0x%x, name_len %i). "
1139 "Please report you saw this message "
1140 "to linux-ntfs-dev@lists.sf.net",
1141 vi
->i_ino
, ni
->type
, ni
->name_len
);
1145 * Resident attribute. Make all sizes equal for simplicity in
1148 vi
->i_size
= ni
->initialized_size
= ni
->allocated_size
=
1149 le32_to_cpu(ctx
->attr
->data
.resident
.value_length
);
1151 NInoSetNonResident(ni
);
1152 if (ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
) {
1153 if (NInoMstProtected(ni
)) {
1154 ntfs_error(vi
->i_sb
, "Found mst protected "
1155 "attribute but the attribute "
1156 "is compressed (mft_no 0x%lx, "
1157 "type 0x%x, name_len %i). "
1158 "Please report you saw this "
1159 "message to linux-ntfs-dev@"
1160 "lists.sf.net", vi
->i_ino
,
1161 ni
->type
, ni
->name_len
);
1164 NInoSetCompressed(ni
);
1165 if ((ni
->type
!= AT_DATA
) || (ni
->type
== AT_DATA
&&
1167 ntfs_error(vi
->i_sb
, "Found compressed non-"
1168 "data or named data attribute "
1169 "(mft_no 0x%lx, type 0x%x, "
1170 "name_len %i). Please report "
1171 "you saw this message to "
1172 "linux-ntfs-dev@lists.sf.net",
1173 vi
->i_ino
, ni
->type
,
1177 if (vol
->cluster_size
> 4096) {
1178 ntfs_error(vi
->i_sb
, "Found "
1179 "compressed attribute but "
1180 "compression is disabled due "
1181 "to cluster size (%i) > 4kiB.",
1185 if ((ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
)
1186 != ATTR_IS_COMPRESSED
) {
1187 ntfs_error(vi
->i_sb
, "Found unknown "
1188 "compression method or "
1192 ni
->itype
.compressed
.block_clusters
= 1U <<
1193 ctx
->attr
->data
.non_resident
.
1195 if (ctx
->attr
->data
.non_resident
.compression_unit
!= 4) {
1196 ntfs_error(vi
->i_sb
, "Found "
1197 "nonstandard compression unit "
1198 "(%u instead of 4). Cannot "
1199 "handle this. This might "
1200 "indicate corruption so you "
1201 "should run chkdsk.",
1202 ctx
->attr
->data
.non_resident
.
1207 ni
->itype
.compressed
.block_size
= 1U << (
1208 ctx
->attr
->data
.non_resident
.
1210 vol
->cluster_size_bits
);
1211 ni
->itype
.compressed
.block_size_bits
= ffs(
1212 ni
->itype
.compressed
.block_size
) - 1;
1214 if (ctx
->attr
->flags
& ATTR_IS_ENCRYPTED
) {
1215 if (ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
) {
1216 ntfs_error(vi
->i_sb
, "Found encrypted "
1217 "and compressed data.");
1220 if (NInoMstProtected(ni
)) {
1221 ntfs_error(vi
->i_sb
, "Found mst protected "
1222 "attribute but the attribute "
1223 "is encrypted (mft_no 0x%lx, "
1224 "type 0x%x, name_len %i). "
1225 "Please report you saw this "
1226 "message to linux-ntfs-dev@"
1227 "lists.sf.net", vi
->i_ino
,
1228 ni
->type
, ni
->name_len
);
1231 NInoSetEncrypted(ni
);
1233 if (ctx
->attr
->flags
& ATTR_IS_SPARSE
) {
1234 if (NInoMstProtected(ni
)) {
1235 ntfs_error(vi
->i_sb
, "Found mst protected "
1236 "attribute but the attribute "
1237 "is sparse (mft_no 0x%lx, "
1238 "type 0x%x, name_len %i). "
1239 "Please report you saw this "
1240 "message to linux-ntfs-dev@"
1241 "lists.sf.net", vi
->i_ino
,
1242 ni
->type
, ni
->name_len
);
1247 if (ctx
->attr
->data
.non_resident
.lowest_vcn
) {
1248 ntfs_error(vi
->i_sb
, "First extent of attribute has "
1249 "non-zero lowest_vcn. Inode is "
1250 "corrupt. You should run chkdsk.");
1253 /* Setup all the sizes. */
1254 vi
->i_size
= sle64_to_cpu(
1255 ctx
->attr
->data
.non_resident
.data_size
);
1256 ni
->initialized_size
= sle64_to_cpu(
1257 ctx
->attr
->data
.non_resident
.initialized_size
);
1258 ni
->allocated_size
= sle64_to_cpu(
1259 ctx
->attr
->data
.non_resident
.allocated_size
);
1260 if (NInoCompressed(ni
)) {
1261 ni
->itype
.compressed
.size
= sle64_to_cpu(
1262 ctx
->attr
->data
.non_resident
.
1267 /* Setup the operations for this attribute inode. */
1270 vi
->i_mapping
->a_ops
= &ntfs_aops
;
1272 if (!NInoCompressed(ni
))
1273 vi
->i_blocks
= ni
->allocated_size
>> 9;
1275 vi
->i_blocks
= ni
->itype
.compressed
.size
>> 9;
1278 * Make sure the base inode doesn't go away and attach it to the
1282 ni
->ext
.base_ntfs_ino
= base_ni
;
1283 ni
->nr_extents
= -1;
1285 put_attr_search_ctx(ctx
);
1286 unmap_mft_record(base_ni
);
1288 ntfs_debug("Done.");
1295 put_attr_search_ctx(ctx
);
1296 unmap_mft_record(base_ni
);
1298 ntfs_error(vi
->i_sb
, "Failed with error code %i while reading "
1299 "attribute inode (mft_no 0x%lx, type 0x%x, name_len "
1300 "%i.", -err
, vi
->i_ino
, ni
->type
, ni
->name_len
);
1306 * ntfs_read_inode_mount - special read_inode for mount time use only
1307 * @vi: inode to read
1309 * Read inode FILE_MFT at mount time, only called with super_block lock
1310 * held from within the read_super() code path.
1312 * This function exists because when it is called the page cache for $MFT/$DATA
1313 * is not initialized and hence we cannot get at the contents of mft records
1314 * by calling map_mft_record*().
1316 * Further it needs to cope with the circular references problem, i.e. can't
1317 * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
1318 * we don't know where the other extent mft records are yet and again, because
1319 * we cannot call map_mft_record*() yet. Obviously this applies only when an
1320 * attribute list is actually present in $MFT inode.
1322 * We solve these problems by starting with the $DATA attribute before anything
1323 * else and iterating using lookup_attr($DATA) over all extents. As each extent
1324 * is found, we decompress_mapping_pairs() including the implied
1325 * merge_run_lists(). Each step of the iteration necessarily provides
1326 * sufficient information for the next step to complete.
1328 * This should work but there are two possible pit falls (see inline comments
1329 * below), but only time will tell if they are real pits or just smoke...
1331 void ntfs_read_inode_mount(struct inode
*vi
)
1333 VCN next_vcn
, last_vcn
, highest_vcn
;
1335 struct super_block
*sb
= vi
->i_sb
;
1336 ntfs_volume
*vol
= NTFS_SB(sb
);
1337 struct buffer_head
*bh
;
1339 MFT_RECORD
*m
= NULL
;
1341 attr_search_context
*ctx
;
1342 unsigned int i
, nr_blocks
;
1345 ntfs_debug("Entering.");
1347 if (vi
->i_ino
!= FILE_MFT
) {
1348 ntfs_error(sb
, "Called for inode 0x%lx but only inode %d "
1349 "allowed.", vi
->i_ino
, FILE_MFT
);
1353 /* Initialize the ntfs specific part of @vi. */
1354 ntfs_init_big_inode(vi
);
1358 /* Setup the data attribute. It is special as it is mst protected. */
1359 NInoSetNonResident(ni
);
1360 NInoSetMstProtected(ni
);
1366 * This sets up our little cheat allowing us to reuse the async io
1367 * completion handler for directories.
1369 ni
->itype
.index
.block_size
= vol
->mft_record_size
;
1370 ni
->itype
.index
.block_size_bits
= vol
->mft_record_size_bits
;
1372 /* Very important! Needed to be able to call map_mft_record*(). */
1375 /* Allocate enough memory to read the first mft record. */
1376 if (vol
->mft_record_size
> 64 * 1024) {
1377 ntfs_error(sb
, "Unsupported mft record size %i (max 64kiB).",
1378 vol
->mft_record_size
);
1381 i
= vol
->mft_record_size
;
1382 if (i
< sb
->s_blocksize
)
1383 i
= sb
->s_blocksize
;
1384 m
= (MFT_RECORD
*)ntfs_malloc_nofs(i
);
1386 ntfs_error(sb
, "Failed to allocate buffer for $MFT record 0.");
1390 /* Determine the first block of the $MFT/$DATA attribute. */
1391 block
= vol
->mft_lcn
<< vol
->cluster_size_bits
>>
1392 sb
->s_blocksize_bits
;
1393 nr_blocks
= vol
->mft_record_size
>> sb
->s_blocksize_bits
;
1397 /* Load $MFT/$DATA's first mft record. */
1398 for (i
= 0; i
< nr_blocks
; i
++) {
1399 bh
= sb_bread(sb
, block
++);
1401 ntfs_error(sb
, "Device read failed.");
1404 memcpy((char*)m
+ (i
<< sb
->s_blocksize_bits
), bh
->b_data
,
1409 /* Apply the mst fixups. */
1410 if (post_read_mst_fixup((NTFS_RECORD
*)m
, vol
->mft_record_size
)) {
1411 /* FIXME: Try to use the $MFTMirr now. */
1412 ntfs_error(sb
, "MST fixup failed. $MFT is corrupt.");
1416 /* Need this to sanity check attribute list references to $MFT. */
1417 ni
->seq_no
= le16_to_cpu(m
->sequence_number
);
1419 /* Provides readpage() and sync_page() for map_mft_record(). */
1420 vi
->i_mapping
->a_ops
= &ntfs_mft_aops
;
1422 ctx
= get_attr_search_ctx(ni
, m
);
1428 /* Find the attribute list attribute if present. */
1429 if (lookup_attr(AT_ATTRIBUTE_LIST
, NULL
, 0, 0, 0, NULL
, 0, ctx
)) {
1430 ATTR_LIST_ENTRY
*al_entry
, *next_al_entry
;
1433 ntfs_debug("Attribute list attribute found in $MFT.");
1434 NInoSetAttrList(ni
);
1435 if (ctx
->attr
->flags
& ATTR_IS_ENCRYPTED
||
1436 ctx
->attr
->flags
& ATTR_COMPRESSION_MASK
||
1437 ctx
->attr
->flags
& ATTR_IS_SPARSE
) {
1438 ntfs_error(sb
, "Attribute list attribute is "
1439 "compressed/encrypted/sparse. Not "
1440 "allowed. $MFT is corrupt. You should "
1444 /* Now allocate memory for the attribute list. */
1445 ni
->attr_list_size
= (u32
)attribute_value_length(ctx
->attr
);
1446 ni
->attr_list
= ntfs_malloc_nofs(ni
->attr_list_size
);
1447 if (!ni
->attr_list
) {
1448 ntfs_error(sb
, "Not enough memory to allocate buffer "
1449 "for attribute list.");
1452 if (ctx
->attr
->non_resident
) {
1453 NInoSetAttrListNonResident(ni
);
1454 if (ctx
->attr
->data
.non_resident
.lowest_vcn
) {
1455 ntfs_error(sb
, "Attribute list has non zero "
1456 "lowest_vcn. $MFT is corrupt. "
1457 "You should run chkdsk.");
1460 /* Setup the run list. */
1461 ni
->attr_list_rl
.rl
= decompress_mapping_pairs(vol
,
1463 if (IS_ERR(ni
->attr_list_rl
.rl
)) {
1464 err
= PTR_ERR(ni
->attr_list_rl
.rl
);
1465 ni
->attr_list_rl
.rl
= NULL
;
1466 ntfs_error(sb
, "Mapping pairs decompression "
1467 "failed with error code %i.",
1471 /* Now load the attribute list. */
1472 if ((err
= load_attribute_list(vol
, &ni
->attr_list_rl
,
1473 ni
->attr_list
, ni
->attr_list_size
,
1474 sle64_to_cpu(ctx
->attr
->data
.
1475 non_resident
.initialized_size
)))) {
1476 ntfs_error(sb
, "Failed to load attribute list "
1477 "attribute with error code %i.",
1481 } else /* if (!ctx.attr->non_resident) */ {
1482 if ((u8
*)ctx
->attr
+ le16_to_cpu(
1483 ctx
->attr
->data
.resident
.value_offset
) +
1485 ctx
->attr
->data
.resident
.value_length
) >
1486 (u8
*)ctx
->mrec
+ vol
->mft_record_size
) {
1487 ntfs_error(sb
, "Corrupt attribute list "
1491 /* Now copy the attribute list. */
1492 memcpy(ni
->attr_list
, (u8
*)ctx
->attr
+ le16_to_cpu(
1493 ctx
->attr
->data
.resident
.value_offset
),
1495 ctx
->attr
->data
.resident
.value_length
));
1497 /* The attribute list is now setup in memory. */
1499 * FIXME: I don't know if this case is actually possible.
1500 * According to logic it is not possible but I have seen too
1501 * many weird things in MS software to rely on logic... Thus we
1502 * perform a manual search and make sure the first $MFT/$DATA
1503 * extent is in the base inode. If it is not we abort with an
1504 * error and if we ever see a report of this error we will need
1505 * to do some magic in order to have the necessary mft record
1506 * loaded and in the right place in the page cache. But
1507 * hopefully logic will prevail and this never happens...
1509 al_entry
= (ATTR_LIST_ENTRY
*)ni
->attr_list
;
1510 al_end
= (u8
*)al_entry
+ ni
->attr_list_size
;
1511 for (;; al_entry
= next_al_entry
) {
1512 /* Out of bounds check. */
1513 if ((u8
*)al_entry
< ni
->attr_list
||
1514 (u8
*)al_entry
> al_end
)
1515 goto em_put_err_out
;
1516 /* Catch the end of the attribute list. */
1517 if ((u8
*)al_entry
== al_end
)
1518 goto em_put_err_out
;
1519 if (!al_entry
->length
)
1520 goto em_put_err_out
;
1521 if ((u8
*)al_entry
+ 6 > al_end
|| (u8
*)al_entry
+
1522 le16_to_cpu(al_entry
->length
) > al_end
)
1523 goto em_put_err_out
;
1524 next_al_entry
= (ATTR_LIST_ENTRY
*)((u8
*)al_entry
+
1525 le16_to_cpu(al_entry
->length
));
1526 if (le32_to_cpu(al_entry
->type
) >
1527 const_le32_to_cpu(AT_DATA
))
1528 goto em_put_err_out
;
1529 if (AT_DATA
!= al_entry
->type
)
1531 /* We want an unnamed attribute. */
1532 if (al_entry
->name_length
)
1533 goto em_put_err_out
;
1534 /* Want the first entry, i.e. lowest_vcn == 0. */
1535 if (al_entry
->lowest_vcn
)
1536 goto em_put_err_out
;
1537 /* First entry has to be in the base mft record. */
1538 if (MREF_LE(al_entry
->mft_reference
) != vi
->i_ino
) {
1539 /* MFT references do not match, logic fails. */
1540 ntfs_error(sb
, "BUG: The first $DATA extent "
1541 "of $MFT is not in the base "
1542 "mft record. Please report "
1543 "you saw this message to "
1544 "linux-ntfs-dev@lists.sf.net");
1547 /* Sequence numbers must match. */
1548 if (MSEQNO_LE(al_entry
->mft_reference
) !=
1550 goto em_put_err_out
;
1551 /* Got it. All is ok. We can stop now. */
1557 reinit_attr_search_ctx(ctx
);
1559 /* Now load all attribute extents. */
1561 next_vcn
= last_vcn
= highest_vcn
= 0;
1562 while (lookup_attr(AT_DATA
, NULL
, 0, 0, next_vcn
, NULL
, 0, ctx
)) {
1563 run_list_element
*nrl
;
1565 /* Cache the current attribute. */
1567 /* $MFT must be non-resident. */
1568 if (!attr
->non_resident
) {
1569 ntfs_error(sb
, "$MFT must be non-resident but a "
1570 "resident extent was found. $MFT is "
1571 "corrupt. Run chkdsk.");
1574 /* $MFT must be uncompressed and unencrypted. */
1575 if (attr
->flags
& ATTR_COMPRESSION_MASK
||
1576 attr
->flags
& ATTR_IS_ENCRYPTED
||
1577 attr
->flags
& ATTR_IS_SPARSE
) {
1578 ntfs_error(sb
, "$MFT must be uncompressed, "
1579 "non-sparse, and unencrypted but a "
1580 "compressed/sparse/encrypted extent "
1581 "was found. $MFT is corrupt. Run "
1586 * Decompress the mapping pairs array of this extent and merge
1587 * the result into the existing run list. No need for locking
1588 * as we have exclusive access to the inode at this time and we
1589 * are a mount in progress task, too.
1591 nrl
= decompress_mapping_pairs(vol
, attr
, ni
->run_list
.rl
);
1593 ntfs_error(sb
, "decompress_mapping_pairs() failed with "
1594 "error code %ld. $MFT is corrupt.",
1598 ni
->run_list
.rl
= nrl
;
1600 /* Are we in the first extent? */
1604 if (attr
->data
.non_resident
.lowest_vcn
) {
1605 ntfs_error(sb
, "First extent of $DATA "
1606 "attribute has non zero "
1607 "lowest_vcn. $MFT is corrupt. "
1608 "You should run chkdsk.");
1611 /* Get the last vcn in the $DATA attribute. */
1612 last_vcn
= sle64_to_cpu(
1613 attr
->data
.non_resident
.allocated_size
)
1614 >> vol
->cluster_size_bits
;
1615 /* Fill in the inode size. */
1616 vi
->i_size
= sle64_to_cpu(
1617 attr
->data
.non_resident
.data_size
);
1618 ni
->initialized_size
= sle64_to_cpu(attr
->data
.
1619 non_resident
.initialized_size
);
1620 ni
->allocated_size
= sle64_to_cpu(
1621 attr
->data
.non_resident
.allocated_size
);
1622 /* Set the number of mft records. */
1623 ll
= vi
->i_size
>> vol
->mft_record_size_bits
;
1625 * Verify the number of mft records does not exceed
1628 if (ll
>= (1ULL << 32)) {
1629 ntfs_error(sb
, "$MFT is too big! Aborting.");
1632 vol
->nr_mft_records
= ll
;
1634 * We have got the first extent of the run_list for
1635 * $MFT which means it is now relatively safe to call
1636 * the normal ntfs_read_inode() function. Thus, take
1637 * us out of the calling chain. Also we need to do this
1638 * now because we need ntfs_read_inode() in place to
1639 * get at subsequent extents.
1641 sb
->s_op
= &ntfs_sops
;
1643 * Complete reading the inode, this will actually
1644 * re-read the mft record for $MFT, this time entering
1645 * it into the page cache with which we complete the
1646 * kick start of the volume. It should be safe to do
1647 * this now as the first extent of $MFT/$DATA is
1648 * already known and we would hope that we don't need
1649 * further extents in order to find the other
1650 * attributes belonging to $MFT. Only time will tell if
1651 * this is really the case. If not we will have to play
1652 * magic at this point, possibly duplicating a lot of
1653 * ntfs_read_inode() at this point. We will need to
1654 * ensure we do enough of its work to be able to call
1655 * ntfs_read_inode() on extents of $MFT/$DATA. But lets
1656 * hope this never happens...
1658 ntfs_read_locked_inode(vi
);
1659 if (is_bad_inode(vi
)) {
1660 ntfs_error(sb
, "ntfs_read_inode() of $MFT "
1661 "failed. BUG or corrupt $MFT. "
1662 "Run chkdsk and if no errors "
1663 "are found, please report you "
1664 "saw this message to "
1665 "linux-ntfs-dev@lists.sf.net");
1666 put_attr_search_ctx(ctx
);
1667 /* Revert to the safe super operations. */
1668 sb
->s_op
= &ntfs_mount_sops
;
1672 * Re-initialize some specifics about $MFT's inode as
1673 * ntfs_read_inode() will have set up the default ones.
1675 /* Set uid and gid to root. */
1676 vi
->i_uid
= vi
->i_gid
= 0;
1677 /* Regular file. No access for anyone. */
1678 vi
->i_mode
= S_IFREG
;
1679 /* No VFS initiated operations allowed for $MFT. */
1680 vi
->i_op
= &ntfs_empty_inode_ops
;
1681 vi
->i_fop
= &ntfs_empty_file_ops
;
1682 /* Put back our special address space operations. */
1683 vi
->i_mapping
->a_ops
= &ntfs_mft_aops
;
1686 /* Get the lowest vcn for the next extent. */
1687 highest_vcn
= sle64_to_cpu(attr
->data
.non_resident
.highest_vcn
);
1688 next_vcn
= highest_vcn
+ 1;
1690 /* Only one extent or error, which we catch below. */
1694 /* Avoid endless loops due to corruption. */
1695 if (next_vcn
< sle64_to_cpu(
1696 attr
->data
.non_resident
.lowest_vcn
)) {
1697 ntfs_error(sb
, "$MFT has corrupt attribute list "
1698 "attribute. Run chkdsk.");
1703 ntfs_error(sb
, "$MFT/$DATA attribute not found. $MFT is "
1704 "corrupt. Run chkdsk.");
1707 if (highest_vcn
&& highest_vcn
!= last_vcn
- 1) {
1708 ntfs_error(sb
, "Failed to load the complete run list "
1709 "for $MFT/$DATA. Driver bug or "
1710 "corrupt $MFT. Run chkdsk.");
1711 ntfs_debug("highest_vcn = 0x%Lx, last_vcn - 1 = 0x%Lx",
1712 (long long)highest_vcn
,
1713 (long long)last_vcn
- 1);
1716 put_attr_search_ctx(ctx
);
1717 ntfs_debug("Done.");
1722 ntfs_error(sb
, "Couldn't find first extent of $DATA attribute in "
1723 "attribute list. $MFT is corrupt. Run chkdsk.");
1725 put_attr_search_ctx(ctx
);
1727 /* Make sure we revert to the safe super operations. */
1728 sb
->s_op
= &ntfs_mount_sops
;
1729 ntfs_error(sb
, "Failed. Marking inode as bad.");
1735 * ntfs_dirty_inode - mark the inode's metadata dirty
1736 * @vi: inode to mark dirty
1738 * This is called from fs/inode.c::__mark_inode_dirty(), when the inode itself
1739 * is being marked dirty. An example is when update_atime() is invoked.
1741 * We mark the inode dirty by setting both the page in which the mft record
1742 * resides and the buffer heads in that page which correspond to the mft record
1743 * dirty. This ensures that the changes will eventually be propagated to disk
1744 * when the inode is set dirty.
1746 * FIXME: Can we do that with the buffer heads? I am not too sure. Because if we
1747 * do that we need to make sure that the kernel will not write out those buffer
1748 * heads or we are screwed as it will write corrupt data to disk. The only way
1749 * a mft record can be written correctly is by mst protecting it, writting it
1750 * synchronously and fast mst deprotecting it. During this period, obviously,
1751 * the mft record must be marked as not uptodate, be locked for writing or
1752 * whatever, so that nobody attempts anything stupid.
1754 * FIXME: Do we need to check that the fs is not mounted read only? And what
1755 * about the inode? Anything else?
1757 * FIXME: As we are only a read only driver it is safe to just return here for
1760 void ntfs_dirty_inode(struct inode
*vi
)
1762 ntfs_debug("Entering for inode 0x%lx.", vi
->i_ino
);
1763 NInoSetDirty(NTFS_I(vi
));
1768 * ntfs_commit_inode - write out a dirty inode
1769 * @ni: inode to write out
1772 int ntfs_commit_inode(ntfs_inode
*ni
)
1774 ntfs_debug("Entering for inode 0x%lx.", ni
->mft_no
);
1780 * ntfs_put_inode - handler for when the inode reference count is decremented
1783 * The VFS calls ntfs_put_inode() every time the inode reference count (i_count)
1784 * is about to be decremented (but before the decrement itself.
1786 * If the inode @vi is a directory with a single reference, we need to put the
1787 * attribute inode for the directory index bitmap, if it is present, otherwise
1788 * the directory inode would remain pinned for ever (or rather until umount()
1791 void ntfs_put_inode(struct inode
*vi
)
1793 if (S_ISDIR(vi
->i_mode
) && (atomic_read(&vi
->i_count
) == 2)) {
1797 if (NInoIndexAllocPresent(ni
) && ni
->itype
.index
.bmp_ino
) {
1798 iput(ni
->itype
.index
.bmp_ino
);
1799 ni
->itype
.index
.bmp_ino
= NULL
;
1805 void __ntfs_clear_inode(ntfs_inode
*ni
)
1809 ntfs_debug("Entering for inode 0x%lx.", ni
->mft_no
);
1810 if (NInoDirty(ni
)) {
1811 err
= ntfs_commit_inode(ni
);
1813 ntfs_error(ni
->vol
->sb
, "Failed to commit dirty "
1814 "inode synchronously.");
1815 // FIXME: Do something!!!
1818 /* Synchronize with ntfs_commit_inode(). */
1819 down(&ni
->mrec_lock
);
1821 if (NInoDirty(ni
)) {
1822 ntfs_error(ni
->vol
->sb
, "Failed to commit dirty inode "
1824 // FIXME: Do something!!!
1826 /* No need to lock at this stage as no one else has a reference. */
1827 if (ni
->nr_extents
> 0) {
1830 // FIXME: Handle dirty case for each extent inode!
1831 for (i
= 0; i
< ni
->nr_extents
; i
++)
1832 ntfs_clear_extent_inode(ni
->ext
.extent_ntfs_inos
[i
]);
1833 kfree(ni
->ext
.extent_ntfs_inos
);
1835 /* Free all alocated memory. */
1836 down_write(&ni
->run_list
.lock
);
1837 if (ni
->run_list
.rl
) {
1838 ntfs_free(ni
->run_list
.rl
);
1839 ni
->run_list
.rl
= NULL
;
1841 up_write(&ni
->run_list
.lock
);
1843 if (ni
->attr_list
) {
1844 ntfs_free(ni
->attr_list
);
1845 ni
->attr_list
= NULL
;
1848 down_write(&ni
->attr_list_rl
.lock
);
1849 if (ni
->attr_list_rl
.rl
) {
1850 ntfs_free(ni
->attr_list_rl
.rl
);
1851 ni
->attr_list_rl
.rl
= NULL
;
1853 up_write(&ni
->attr_list_rl
.lock
);
1855 if (ni
->name_len
&& ni
->name
!= I30
) {
1862 void ntfs_clear_extent_inode(ntfs_inode
*ni
)
1864 __ntfs_clear_inode(ni
);
1867 ntfs_destroy_extent_inode(ni
);
1871 * ntfs_clear_big_inode - clean up the ntfs specific part of an inode
1872 * @vi: vfs inode pending annihilation
1874 * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
1875 * is called, which deallocates all memory belonging to the NTFS specific part
1876 * of the inode and returns.
1878 * If the MFT record is dirty, we commit it before doing anything else.
1880 void ntfs_clear_big_inode(struct inode
*vi
)
1882 ntfs_inode
*ni
= NTFS_I(vi
);
1884 __ntfs_clear_inode(ni
);
1887 /* Release the base inode if we are holding it. */
1888 if (ni
->nr_extents
== -1) {
1889 iput(VFS_I(ni
->ext
.base_ntfs_ino
));
1891 ni
->ext
.base_ntfs_ino
= NULL
;
1898 * ntfs_show_options - show mount options in /proc/mounts
1899 * @sf: seq_file in which to write our mount options
1900 * @mnt: vfs mount whose mount options to display
1902 * Called by the VFS once for each mounted ntfs volume when someone reads
1903 * /proc/mounts in order to display the NTFS specific mount options of each
1904 * mount. The mount options of the vfs mount @mnt are written to the seq file
1905 * @sf and success is returned.
1907 int ntfs_show_options(struct seq_file
*sf
, struct vfsmount
*mnt
)
1909 ntfs_volume
*vol
= NTFS_SB(mnt
->mnt_sb
);
1912 seq_printf(sf
, ",uid=%i", vol
->uid
);
1913 seq_printf(sf
, ",gid=%i", vol
->gid
);
1914 if (vol
->fmask
== vol
->dmask
)
1915 seq_printf(sf
, ",umask=0%o", vol
->fmask
);
1917 seq_printf(sf
, ",fmask=0%o", vol
->fmask
);
1918 seq_printf(sf
, ",dmask=0%o", vol
->dmask
);
1920 seq_printf(sf
, ",nls=%s", vol
->nls_map
->charset
);
1921 if (NVolCaseSensitive(vol
))
1922 seq_printf(sf
, ",case_sensitive");
1923 if (NVolShowSystemFiles(vol
))
1924 seq_printf(sf
, ",show_sys_files");
1925 for (i
= 0; on_errors_arr
[i
].val
; i
++) {
1926 if (on_errors_arr
[i
].val
& vol
->on_errors
)
1927 seq_printf(sf
, ",errors=%s", on_errors_arr
[i
].str
);
1929 seq_printf(sf
, ",mft_zone_multiplier=%i", vol
->mft_zone_multiplier
);
1936 * ntfs_truncate - called when the i_size of an ntfs inode is changed
1937 * @vi: inode for which the i_size was changed
1939 * We don't support i_size changes yet.
1941 * Called with ->i_sem held.
1943 void ntfs_truncate(struct inode
*vi
)
1945 // TODO: Implement...
1946 ntfs_warning(vi
->i_sb
, "Eeek: i_size may have changed! If you see "
1947 "this right after a message from "
1948 "ntfs_{prepare,commit}_{,nonresident_}write() then "
1949 "just ignore it. Otherwise it is bad news.");
1950 // TODO: reset i_size now!
1955 * ntfs_setattr - called from notify_change() when an attribute is being changed
1956 * @dentry: dentry whose attributes to change
1957 * @attr: structure describing the attributes and the changes
1959 * We have to trap VFS attempts to truncate the file described by @dentry as
1960 * soon as possible, because we do not implement changes in i_size yet. So we
1961 * abort all i_size changes here.
1963 * Called with ->i_sem held.
1965 * Basically this is a copy of generic notify_change() and inode_setattr()
1966 * functionality, except we intercept and abort changes in i_size.
1968 int ntfs_setattr(struct dentry
*dentry
, struct iattr
*attr
)
1972 unsigned int ia_valid
= attr
->ia_valid
;
1974 vi
= dentry
->d_inode
;
1976 err
= inode_change_ok(vi
, attr
);
1980 if ((ia_valid
& ATTR_UID
&& attr
->ia_uid
!= vi
->i_uid
) ||
1981 (ia_valid
& ATTR_GID
&& attr
->ia_gid
!= vi
->i_gid
)) {
1982 err
= DQUOT_TRANSFER(vi
, attr
) ? -EDQUOT
: 0;
1989 if (ia_valid
& ATTR_SIZE
) {
1990 ntfs_error(vi
->i_sb
, "Changes in i_size are not supported "
1992 // TODO: Implement...
1993 // err = vmtruncate(vi, attr->ia_size);
1999 if (ia_valid
& ATTR_UID
)
2000 vi
->i_uid
= attr
->ia_uid
;
2001 if (ia_valid
& ATTR_GID
)
2002 vi
->i_gid
= attr
->ia_gid
;
2003 if (ia_valid
& ATTR_ATIME
)
2004 vi
->i_atime
= attr
->ia_atime
;
2005 if (ia_valid
& ATTR_MTIME
)
2006 vi
->i_mtime
= attr
->ia_mtime
;
2007 if (ia_valid
& ATTR_CTIME
)
2008 vi
->i_ctime
= attr
->ia_ctime
;
2009 if (ia_valid
& ATTR_MODE
) {
2010 vi
->i_mode
= attr
->ia_mode
;
2011 if (!in_group_p(vi
->i_gid
) &&
2012 !capable(CAP_FSETID
))
2013 vi
->i_mode
&= ~S_ISGID
;
2015 mark_inode_dirty(vi
);