summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/squashfs.txt225
-rw-r--r--MAINTAINERS7
-rw-r--r--fs/Kconfig52
-rw-r--r--fs/Makefile1
-rw-r--r--fs/squashfs/Makefile8
-rw-r--r--fs/squashfs/block.c274
-rw-r--r--fs/squashfs/cache.c412
-rw-r--r--fs/squashfs/dir.c235
-rw-r--r--fs/squashfs/export.c155
-rw-r--r--fs/squashfs/file.c502
-rw-r--r--fs/squashfs/fragment.c98
-rw-r--r--fs/squashfs/id.c94
-rw-r--r--fs/squashfs/inode.c346
-rw-r--r--fs/squashfs/namei.c242
-rw-r--r--fs/squashfs/squashfs.h90
-rw-r--r--fs/squashfs/squashfs_fs.h381
-rw-r--r--fs/squashfs/squashfs_fs_i.h45
-rw-r--r--fs/squashfs/squashfs_fs_sb.h76
-rw-r--r--fs/squashfs/super.c440
-rw-r--r--fs/squashfs/symlink.c118
-rw-r--r--init/do_mounts_rd.c14
21 files changed, 3815 insertions, 0 deletions
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt
new file mode 100644
index 000000000000..3e79e4a7a392
--- /dev/null
+++ b/Documentation/filesystems/squashfs.txt
@@ -0,0 +1,225 @@
+SQUASHFS 4.0 FILESYSTEM
+=======================
+
+Squashfs is a compressed read-only filesystem for Linux.
+It uses zlib compression to compress files, inodes and directories.
+Inodes in the system are very small and all blocks are packed to minimise
+data overhead. Block sizes greater than 4K are supported up to a maximum
+of 1Mbytes (default block size 128K).
+
+Squashfs is intended for general read-only filesystem use, for archival
+use (i.e. in cases where a .tar.gz file may be used), and in constrained
+block device/memory systems (e.g. embedded systems) where low overhead is
+needed.
+
+Mailing list: squashfs-devel@lists.sourceforge.net
+Web site: www.squashfs.org
+
+1. FILESYSTEM FEATURES
+----------------------
+
+Squashfs filesystem features versus Cramfs:
+
+ Squashfs Cramfs
+
+Max filesystem size: 2^64 16 MiB
+Max file size: ~ 2 TiB 16 MiB
+Max files: unlimited unlimited
+Max directories: unlimited unlimited
+Max entries per directory: unlimited unlimited
+Max block size: 1 MiB 4 KiB
+Metadata compression: yes no
+Directory indexes: yes no
+Sparse file support: yes no
+Tail-end packing (fragments): yes no
+Exportable (NFS etc.): yes no
+Hard link support: yes no
+"." and ".." in readdir: yes no
+Real inode numbers: yes no
+32-bit uids/gids: yes no
+File creation time: yes no
+Xattr and ACL support: no no
+
+Squashfs compresses data, inodes and directories. In addition, inode and
+directory data are highly compacted, and packed on byte boundaries. Each
+compressed inode is on average 8 bytes in length (the exact length varies on
+file type, i.e. regular file, directory, symbolic link, and block/char device
+inodes have different sizes).
+
+2. USING SQUASHFS
+-----------------
+
+As squashfs is a read-only filesystem, the mksquashfs program must be used to
+create populated squashfs filesystems. This and other squashfs utilities
+can be obtained from http://www.squashfs.org. Usage instructions can be
+obtained from this site also.
+
+
+3. SQUASHFS FILESYSTEM DESIGN
+-----------------------------
+
+A squashfs filesystem consists of seven parts, packed together on a byte
+alignment:
+
+ ---------------
+ | superblock |
+ |---------------|
+ | datablocks |
+ | & fragments |
+ |---------------|
+ | inode table |
+ |---------------|
+ | directory |
+ | table |
+ |---------------|
+ | fragment |
+ | table |
+ |---------------|
+ | export |
+ | table |
+ |---------------|
+ | uid/gid |
+ | lookup table |
+ ---------------
+
+Compressed data blocks are written to the filesystem as files are read from
+the source directory, and checked for duplicates. Once all file data has been
+written the completed inode, directory, fragment, export and uid/gid lookup
+tables are written.
+
+3.1 Inodes
+----------
+
+Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each
+compressed block is prefixed by a two byte length, the top bit is set if the
+block is uncompressed. A block will be uncompressed if the -noI option is set,
+or if the compressed block was larger than the uncompressed block.
+
+Inodes are packed into the metadata blocks, and are not aligned to block
+boundaries, therefore inodes overlap compressed blocks. Inodes are identified
+by a 48-bit number which encodes the location of the compressed metadata block
+containing the inode, and the byte offset into that block where the inode is
+placed (<block, offset>).
+
+To maximise compression there are different inodes for each file type
+(regular file, directory, device, etc.), the inode contents and length
+varying with the type.
+
+To further maximise compression, two types of regular file inode and
+directory inode are defined: inodes optimised for frequently occurring
+regular files and directories, and extended types where extra
+information has to be stored.
+
+3.2 Directories
+---------------
+
+Like inodes, directories are packed into compressed metadata blocks, stored
+in a directory table. Directories are accessed using the start address of
+the metablock containing the directory and the offset into the
+decompressed block (<block, offset>).
+
+Directories are organised in a slightly complex way, and are not simply
+a list of file names. The organisation takes advantage of the
+fact that (in most cases) the inodes of the files will be in the same
+compressed metadata block, and therefore, can share the start block.
+Directories are therefore organised in a two level list, a directory
+header containing the shared start block value, and a sequence of directory
+entries, each of which share the shared start block. A new directory header
+is written once/if the inode start block changes. The directory
+header/directory entry list is repeated as many times as necessary.
+
+Directories are sorted, and can contain a directory index to speed up
+file lookup. Directory indexes store one entry per metablock, each entry
+storing the index/filename mapping to the first directory header
+in each metadata block. Directories are sorted in alphabetical order,
+and at lookup the index is scanned linearly looking for the first filename
+alphabetically larger than the filename being looked up. At this point the
+location of the metadata block the filename is in has been found.
+The general idea of the index is ensure only one metadata block needs to be
+decompressed to do a lookup irrespective of the length of the directory.
+This scheme has the advantage that it doesn't require extra memory overhead
+and doesn't require much extra storage on disk.
+
+3.3 File data
+-------------
+
+Regular files consist of a sequence of contiguous compressed blocks, and/or a
+compressed fragment block (tail-end packed block). The compressed size
+of each datablock is stored in a block list contained within the
+file inode.
+
+To speed up access to datablocks when reading 'large' files (256 Mbytes or
+larger), the code implements an index cache that caches the mapping from
+block index to datablock location on disk.
+
+The index cache allows Squashfs to handle large files (up to 1.75 TiB) while
+retaining a simple and space-efficient block list on disk. The cache
+is split into slots, caching up to eight 224 GiB files (128 KiB blocks).
+Larger files use multiple slots, with 1.75 TiB files using all 8 slots.
+The index cache is designed to be memory efficient, and by default uses
+16 KiB.
+
+3.4 Fragment lookup table
+-------------------------
+
+Regular files can contain a fragment index which is mapped to a fragment
+location on disk and compressed size using a fragment lookup table. This
+fragment lookup table is itself stored compressed into metadata blocks.
+A second index table is used to locate these. This second index table for
+speed of access (and because it is small) is read at mount time and cached
+in memory.
+
+3.5 Uid/gid lookup table
+------------------------
+
+For space efficiency regular files store uid and gid indexes, which are
+converted to 32-bit uids/gids using an id look up table. This table is
+stored compressed into metadata blocks. A second index table is used to
+locate these. This second index table for speed of access (and because it
+is small) is read at mount time and cached in memory.
+
+3.6 Export table
+----------------
+
+To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems
+can optionally (disabled with the -no-exports Mksquashfs option) contain
+an inode number to inode disk location lookup table. This is required to
+enable Squashfs to map inode numbers passed in filehandles to the inode
+location on disk, which is necessary when the export code reinstantiates
+expired/flushed inodes.
+
+This table is stored compressed into metadata blocks. A second index table is
+used to locate these. This second index table for speed of access (and because
+it is small) is read at mount time and cached in memory.
+
+
+4. TODOS AND OUTSTANDING ISSUES
+-------------------------------
+
+4.1 Todo list
+-------------
+
+Implement Xattr and ACL support. The Squashfs 4.0 filesystem layout has hooks
+for these but the code has not been written. Once the code has been written
+the existing layout should not require modification.
+
+4.2 Squashfs internal cache
+---------------------------
+
+Blocks in Squashfs are compressed. To avoid repeatedly decompressing
+recently accessed data Squashfs uses two small metadata and fragment caches.
+
+The cache is not used for file datablocks, these are decompressed and cached in
+the page-cache in the normal way. The cache is used to temporarily cache
+fragment and metadata blocks which have been read as a result of a metadata
+(i.e. inode or directory) or fragment access. Because metadata and fragments
+are packed together into blocks (to gain greater compression) the read of a
+particular piece of metadata or fragment will retrieve other metadata/fragments
+which have been packed with it, these because of locality-of-reference may be
+read in the near future. Temporarily caching them ensures they are available
+for near future access without requiring an additional read and decompress.
+
+In the future this internal cache may be replaced with an implementation which
+uses the kernel page cache. Because the page cache operates on page sized
+units this may introduce additional complexity in terms of locking and
+associated race conditions.
diff --git a/MAINTAINERS b/MAINTAINERS
index 57e0309243cc..6f65a269cb17 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4081,6 +4081,13 @@ L: cbe-oss-dev@ozlabs.org
W: http://www.ibm.com/developerworks/power/cell/
S: Supported
+SQUASHFS FILE SYSTEM
+P: Phillip Lougher
+M: phillip@lougher.demon.co.uk
+L: squashfs-devel@lists.sourceforge.net (subscribers-only)
+W: http://squashfs.org.uk
+S: Maintained
+
SRM (Alpha) environment access
P: Jan-Benedict Glaw
M: jbglaw@lug-owl.de
diff --git a/fs/Kconfig b/fs/Kconfig
index 02cff86af1b4..51307b0fdf0f 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -932,6 +932,58 @@ config CRAMFS
If unsure, say N.
+config SQUASHFS
+ tristate "SquashFS 4.0 - Squashed file system support"
+ depends on BLOCK
+ select ZLIB_INFLATE
+ help
+ Saying Y here includes support for SquashFS 4.0 (a Compressed
+ Read-Only File System). Squashfs is a highly compressed read-only
+ filesystem for Linux. It uses zlib compression to compress both
+ files, inodes and directories. Inodes in the system are very small
+ and all blocks are packed to minimise data overhead. Block sizes
+ greater than 4K are supported up to a maximum of 1 Mbytes (default
+ block size 128K). SquashFS 4.0 supports 64 bit filesystems and files
+ (larger than 4GB), full uid/gid information, hard links and
+ timestamps.
+
+ Squashfs is intended for general read-only filesystem use, for
+ archival use (i.e. in cases where a .tar.gz file may be used), and in
+ embedded systems where low overhead is needed. Further information
+ and tools are available from http://squashfs.sourceforge.net.
+
+ If you want to compile this as a module ( = code which can be
+ inserted in and removed from the running kernel whenever you want),
+ say M here and read <file:Documentation/modules.txt>. The module
+ will be called squashfs. Note that the root file system (the one
+ containing the directory /) cannot be compiled as a module.
+
+ If unsure, say N.
+
+config SQUASHFS_EMBEDDED
+
+ bool "Additional option for memory-constrained systems"
+ depends on SQUASHFS
+ default n
+ help
+ Saying Y here allows you to specify cache size.
+
+ If unsure, say N.
+
+config SQUASHFS_FRAGMENT_CACHE_SIZE
+ int "Number of fragments cached" if SQUASHFS_EMBEDDED
+ depends on SQUASHFS
+ default "3"
+ help
+ By default SquashFS caches the last 3 fragments read from
+ the filesystem. Increasing this amount may mean SquashFS
+ has to re-read fragments less often from disk, at the expense
+ of extra system memory. Decreasing this amount will mean
+ SquashFS uses less memory at the expense of extra reads from disk.
+
+ Note there must be at least one cached fragment. Anything
+ much more than three will probably not make much difference.
+
config VXFS_FS
tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)"
depends on BLOCK
diff --git a/fs/Makefile b/fs/Makefile
index bc4e14df1082..38bc735c67ad 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_JBD) += jbd/
obj-$(CONFIG_JBD2) += jbd2/
obj-$(CONFIG_EXT2_FS) += ext2/
obj-$(CONFIG_CRAMFS) += cramfs/
+obj-$(CONFIG_SQUASHFS) += squashfs/
obj-y += ramfs/
obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
obj-$(CONFIG_CODA_FS) += coda/
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
new file mode 100644
index 000000000000..8258cf9a0317
--- /dev/null
+++ b/fs/squashfs/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux squashfs routines.
+#
+
+obj-$(CONFIG_SQUASHFS) += squashfs.o
+squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
+squashfs-y += namei.o super.o symlink.o
+#squashfs-y += squashfs2_0.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
new file mode 100644
index 000000000000..c837dfc2b3c6
--- /dev/null
+++ b/fs/squashfs/block.c
@@ -0,0 +1,274 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * block.c
+ */
+
+/*
+ * This file implements the low-level routines to read and decompress
+ * datablocks and metadata blocks.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Read the metadata block length, this is stored in the first two
+ * bytes of the metadata block.
+ */
+static struct buffer_head *get_block_length(struct super_block *sb,
+ u64 *cur_index, int *offset, int *length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head *bh;
+
+ bh = sb_bread(sb, *cur_index);
+ if (bh == NULL)
+ return NULL;
+
+ if (msblk->devblksize - *offset == 1) {
+ *length = (unsigned char) bh->b_data[*offset];
+ put_bh(bh);
+ bh = sb_bread(sb, ++(*cur_index));
+ if (bh == NULL)
+ return NULL;
+ *length |= (unsigned char) bh->b_data[0] << 8;
+ *offset = 1;
+ } else {
+ *length = (unsigned char) bh->b_data[*offset] |
+ (unsigned char) bh->b_data[*offset + 1] << 8;
+ *offset += 2;
+ }
+
+ return bh;
+}
+
+
+/*
+ * Read and decompress a metadata block or datablock. Length is non-zero
+ * if a datablock is being read (the size is stored elsewhere in the
+ * filesystem), otherwise the length is obtained from the first two bytes of
+ * the metadata block. A bit in the length field indicates if the block
+ * is stored uncompressed in the filesystem (usually because compression
+ * generated a larger block - this does occasionally happen with zlib).
+ */
+int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
+ int length, u64 *next_index, int srclength)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct buffer_head **bh;
+ int offset = index & ((1 << msblk->devblksize_log2) - 1);
+ u64 cur_index = index >> msblk->devblksize_log2;
+ int bytes, compressed, b = 0, k = 0, page = 0, avail;
+
+
+ bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1,
+ sizeof(*bh), GFP_KERNEL);
+ if (bh == NULL)
+ return -ENOMEM;
+
+ if (length) {
+ /*
+ * Datablock.
+ */
+ bytes = -offset;
+ compressed = SQUASHFS_COMPRESSED_BLOCK(length);
+ length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+ if (next_index)
+ *next_index = index + length;
+
+ TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
+ index, compressed ? "" : "un", length, srclength);
+
+ if (length < 0 || length > srclength ||
+ (index + length) > msblk->bytes_used)
+ goto read_failure;
+
+ for (b = 0; bytes < length; b++, cur_index++) {
+ bh[b] = sb_getblk(sb, cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b, bh);
+ } else {
+ /*
+ * Metadata block.
+ */
+ if ((index + 2) > msblk->bytes_used)
+ goto read_failure;
+
+ bh[0] = get_block_length(sb, &cur_index, &offset, &length);
+ if (bh[0] == NULL)
+ goto read_failure;
+ b = 1;
+
+ bytes = msblk->devblksize - offset;
+ compressed = SQUASHFS_COMPRESSED(length);
+ length = SQUASHFS_COMPRESSED_SIZE(length);
+ if (next_index)
+ *next_index = index + length + 2;
+
+ TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
+ compressed ? "" : "un", length);
+
+ if (length < 0 || length > srclength ||
+ (index + length) > msblk->bytes_used)
+ goto block_release;
+
+ for (; bytes < length; b++) {
+ bh[b] = sb_getblk(sb, ++cur_index);
+ if (bh[b] == NULL)
+ goto block_release;
+ bytes += msblk->devblksize;
+ }
+ ll_rw_block(READ, b - 1, bh + 1);
+ }
+
+ if (compressed) {
+ int zlib_err = 0, zlib_init = 0;
+
+ /*
+ * Uncompress block.
+ */
+
+ mutex_lock(&msblk->read_data_mutex);
+
+ msblk->stream.avail_out = 0;
+ msblk->stream.avail_in = 0;
+
+ bytes = length;
+ do {
+ if (msblk->stream.avail_in == 0 && k < b) {
+ avail = min(bytes, msblk->devblksize - offset);
+ bytes -= avail;
+ wait_on_buffer(bh[k]);
+ if (!buffer_uptodate(bh[k]))
+ goto release_mutex;
+
+ if (avail == 0) {
+ offset = 0;
+ put_bh(bh[k++]);
+ continue;
+ }
+
+ msblk->stream.next_in = bh[k]->b_data + offset;
+ msblk->stream.avail_in = avail;
+ offset = 0;
+ }
+
+ if (msblk->stream.avail_out == 0) {
+ msblk->stream.next_out = buffer[page++];
+ msblk->stream.avail_out = PAGE_CACHE_SIZE;
+ }
+
+ if (!zlib_init) {
+ zlib_err = zlib_inflateInit(&msblk->stream);
+ if (zlib_err != Z_OK) {
+ ERROR("zlib_inflateInit returned"
+ " unexpected result 0x%x,"
+ " srclength %d\n", zlib_err,
+ srclength);
+ goto release_mutex;
+ }
+ zlib_init = 1;
+ }
+
+ zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH);
+
+ if (msblk->stream.avail_in == 0 && k < b)
+ put_bh(bh[k++]);
+ } while (zlib_err == Z_OK);
+
+ if (zlib_err != Z_STREAM_END) {
+ ERROR("zlib_inflate returned unexpected result"
+ " 0x%x, srclength %d, avail_in %d,"
+ " avail_out %d\n", zlib_err, srclength,
+ msblk->stream.avail_in,
+ msblk->stream.avail_out);
+ goto release_mutex;
+ }
+
+ zlib_err = zlib_inflateEnd(&msblk->stream);
+ if (zlib_err != Z_OK) {
+ ERROR("zlib_inflateEnd returned unexpected result 0x%x,"
+ " srclength %d\n", zlib_err, srclength);
+ goto release_mutex;
+ }
+ length = msblk->stream.total_out;
+ mutex_unlock(&msblk->read_data_mutex);
+ } else {
+ /*
+ * Block is uncompressed.
+ */
+ int i, in, pg_offset = 0;
+
+ for (i = 0; i < b; i++) {
+ wait_on_buffer(bh[i]);
+ if (!buffer_uptodate(bh[i]))
+ goto block_release;
+ }
+
+ for (bytes = length; k < b; k++) {
+ in = min(bytes, msblk->devblksize - offset);
+ bytes -= in;
+ while (in) {
+ if (pg_offset == PAGE_CACHE_SIZE) {
+ page++;
+ pg_offset = 0;
+ }
+ avail = min_t(int, in, PAGE_CACHE_SIZE -
+ pg_offset);
+ memcpy(buffer[page] + pg_offset,
+ bh[k]->b_data + offset, avail);
+ in -= avail;
+ pg_offset += avail;
+ offset += avail;
+ }
+ offset = 0;
+ put_bh(bh[k]);
+ }
+ }
+
+ kfree(bh);
+ return length;
+
+release_mutex:
+ mutex_unlock(&msblk->read_data_mutex);
+
+block_release:
+ for (; k < b; k++)
+ put_bh(bh[k]);
+
+read_failure:
+ ERROR("sb_bread failed reading block 0x%llx\n", cur_index);
+ kfree(bh);
+ return -EIO;
+}
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
new file mode 100644
index 000000000000..f29eda16d25e
--- /dev/null
+++ b/fs/squashfs/cache.c
@@ -0,0 +1,412 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * cache.c
+ */
+
+/*
+ * Blocks in Squashfs are compressed. To avoid repeatedly decompressing
+ * recently accessed data Squashfs uses two small metadata and fragment caches.
+ *
+ * This file implements a generic cache implementation used for both caches,
+ * plus functions layered ontop of the generic cache implementation to
+ * access the metadata and fragment caches.
+ *
+ * To avoid out of memory and fragmentation isssues with vmalloc the cache
+ * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
+ *
+ * It should be noted that the cache is not used for file datablocks, these
+ * are decompressed and cached in the page-cache in the normal way. The
+ * cache is only used to temporarily cache fragment and metadata blocks
+ * which have been read as as a result of a metadata (i.e. inode or
+ * directory) or fragment access. Because metadata and fragments are packed
+ * together into blocks (to gain greater compression) the read of a particular
+ * piece of metadata or fragment will retrieve other metadata/fragments which
+ * have been packed with it, these because of locality-of-reference may be read
+ * in the near future. Temporarily caching them ensures they are available for
+ * near future access without requiring an additional read and decompress.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/zlib.h>
+#include <linux/pagemap.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Look-up block in cache, and increment usage count. If not in cache, read
+ * and decompress it from disk.
+ */
+struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
+ struct squashfs_cache *cache, u64 block, int length)
+{
+ int i, n;
+ struct squashfs_cache_entry *entry;
+
+ spin_lock(&cache->lock);
+
+ while (1) {
+ for (i = 0; i < cache->entries; i++)
+ if (cache->entry[i].block == block)
+ break;
+
+ if (i == cache->entries) {
+ /*
+ * Block not in cache, if all cache entries are used
+ * go to sleep waiting for one to become available.
+ */
+ if (cache->unused == 0) {
+ cache->num_waiters++;
+ spin_unlock(&cache->lock);
+ wait_event(cache->wait_queue, cache->unused);
+ spin_lock(&cache->lock);
+ cache->num_waiters--;
+ continue;
+ }
+
+ /*
+ * At least one unused cache entry. A simple
+ * round-robin strategy is used to choose the entry to
+ * be evicted from the cache.
+ */
+ i = cache->next_blk;
+ for (n = 0; n < cache->entries; n++) {
+ if (cache->entry[i].refcount == 0)
+ break;
+ i = (i + 1) % cache->entries;
+ }
+
+ cache->next_blk = (i + 1) % cache->entries;
+ entry = &cache->entry[i];
+
+ /*
+ * Initialise choosen cache entry, and fill it in from
+ * disk.
+ */
+ cache->unused--;
+ entry->block = block;
+ entry->refcount = 1;
+ entry->pending = 1;
+ entry->num_waiters = 0;
+ entry->error = 0;
+ spin_unlock(&cache->lock);
+
+ entry->length = squashfs_read_data(sb, entry->data,
+ block, length, &entry->next_index,
+ cache->block_size);
+
+ spin_lock(&cache->lock);
+
+ if (entry->length < 0)
+ entry->error = entry->length;
+
+ entry->pending = 0;
+
+ /*
+ * While filling this entry one or more other processes
+ * have looked it up in the cache, and have slept
+ * waiting for it to become available.
+ */
+ if (entry->num_waiters) {
+ spin_unlock(&cache->lock);
+ wake_up_all(&entry->wait_queue);
+ } else
+ spin_unlock(&cache->lock);
+
+ goto out;
+ }
+
+ /*
+ * Block already in cache. Increment refcount so it doesn't
+ * get reused until we're finished with it, if it was
+ * previously unused there's one less cache entry available
+ * for reuse.
+ */
+ entry = &cache->entry[i];
+ if (entry->refcount == 0)
+ cache->unused--;
+ entry->refcount++;
+
+ /*
+ * If the entry is currently being filled in by another process
+ * go to sleep waiting for it to become available.
+ */
+ if (entry->pending) {
+ entry->num_waiters++;
+ spin_unlock(&cache->lock);
+ wait_event(entry->wait_queue, !entry->pending);
+ } else
+ spin_unlock(&cache->lock);
+
+ goto out;
+ }
+
+out:
+ TRACE("Got %s %d, start block %lld, refcount %d, error %d\n",
+ cache->name, i, entry->block, entry->refcount, entry->error);
+
+ if (entry->error)
+ ERROR("Unable to read %s cache entry [%llx]\n", cache->name,
+ block);
+ return entry;
+}
+
+
+/*
+ * Release cache entry, once usage count is zero it can be reused.
+ */
+void squashfs_cache_put(struct squashfs_cache_entry *entry)
+{
+ struct squashfs_cache *cache = entry->cache;
+
+ spin_lock(&cache->lock);
+ entry->refcount--;
+ if (entry->refcount == 0) {
+ cache->unused++;
+ /*
+ * If there's any processes waiting for a block to become
+ * available, wake one up.
+ */
+ if (cache->num_waiters) {
+ spin_unlock(&cache->lock);
+ wake_up(&cache->wait_queue);
+ return;
+ }
+ }
+ spin_unlock(&cache->lock);
+}
+
+/*
+ * Delete cache reclaiming all kmalloced buffers.
+ */
+void squashfs_cache_delete(struct squashfs_cache *cache)
+{
+ int i, j;
+
+ if (cache == NULL)
+ return;
+
+ for (i = 0; i < cache->entries; i++) {
+ if (cache->entry[i].data) {
+ for (j = 0; j < cache->pages; j++)
+ kfree(cache->entry[i].data[j]);
+ kfree(cache->entry[i].data);
+ }
+ }
+
+ kfree(cache->entry);
+ kfree(cache);
+}
+
+
+/*
+ * Initialise cache allocating the specified number of entries, each of
+ * size block_size. To avoid vmalloc fragmentation issues each entry
+ * is allocated as a sequence of kmalloced PAGE_CACHE_SIZE buffers.
+ */
+struct squashfs_cache *squashfs_cache_init(char *name, int entries,
+ int block_size)
+{
+ int i, j;
+ struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
+
+ if (cache == NULL) {
+ ERROR("Failed to allocate %s cache\n", name);
+ return NULL;
+ }
+
+ cache->entry = kcalloc(entries, sizeof(*(cache->entry)), GFP_KERNEL);
+ if (cache->entry == NULL) {
+ ERROR("Failed to allocate %s cache\n", name);
+ goto cleanup;
+ }
+
+ cache->next_blk = 0;
+ cache->unused = entries;
+ cache->entries = entries;
+ cache->block_size = block_size;
+ cache->pages = block_size >> PAGE_CACHE_SHIFT;
+ cache->name = name;
+ cache->num_waiters = 0;
+ spin_lock_init(&cache->lock);
+ init_waitqueue_head(&cache->wait_queue);
+
+ for (i = 0; i < entries; i++) {
+ struct squashfs_cache_entry *entry = &cache->entry[i];
+
+ init_waitqueue_head(&cache->entry[i].wait_queue);
+ entry->cache = cache;
+ entry->block = SQUASHFS_INVALID_BLK;
+ entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
+ if (entry->data == NULL) {
+ ERROR("Failed to allocate %s cache entry\n", name);
+ goto cleanup;
+ }
+
+ for (j = 0; j < cache->pages; j++) {
+ entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ if (entry->data[j] == NULL) {
+ ERROR("Failed to allocate %s buffer\n", name);
+ goto cleanup;
+ }
+ }
+ }
+
+ return cache;
+
+cleanup:
+ squashfs_cache_delete(cache);
+ return NULL;
+}
+
+
+/*
+ * Copy upto length bytes from cache entry to buffer starting at offset bytes
+ * into the cache entry. If there's not length bytes then copy the number of
+ * bytes available. In all cases return the number of bytes copied.
+ */
+int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
+ int offset, int length)
+{
+ int remaining = length;
+
+ if (length == 0)
+ return 0;
+ else if (buffer == NULL)
+ return min(length, entry->length - offset);
+
+ while (offset < entry->length) {
+ void *buff = entry->data[offset / PAGE_CACHE_SIZE]
+ + (offset % PAGE_CACHE_SIZE);
+ int bytes = min_t(int, entry->length - offset,
+ PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
+
+ if (bytes >= remaining) {
+ memcpy(buffer, buff, remaining);
+ remaining = 0;
+ break;
+ }
+
+ memcpy(buffer, buff, bytes);
+ buffer += bytes;
+ remaining -= bytes;
+ offset += bytes;
+ }
+
+ return length - remaining;
+}
+
+
+/*
+ * Read length bytes from metadata position <block, offset> (block is the
+ * start of the compressed block on disk, and offset is the offset into
+ * the block once decompressed). Data is packed into consecutive blocks,
+ * and length bytes may require reading more than one block.
+ */
+int squashfs_read_metadata(struct super_block *sb, void *buffer,
+ u64 *block, int *offset, int length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int bytes, copied = length;
+ struct squashfs_cache_entry *entry;
+
+ TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
+
+ while (length) {
+ entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
+ if (entry->error)
+ return entry->error;
+ else if (*offset >= entry->length)
+ return -EIO;
+
+ bytes = squashfs_copy_data(buffer, entry, *offset, length);
+ if (buffer)
+ buffer += bytes;
+ length -= bytes;
+ *offset += bytes;
+
+ if (*offset == entry->length) {
+ *block = entry->next_index;
+ *offset = 0;
+ }
+
+ squashfs_cache_put(entry);
+ }
+
+ return copied;
+}
+
+
+/*
+ * Look-up in the fragmment cache the fragment located at <start_block> in the
+ * filesystem. If necessary read and decompress it from disk.
+ */
+struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *sb,
+ u64 start_block, int length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+
+ return squashfs_cache_get(sb, msblk->fragment_cache, start_block,
+ length);
+}
+
+
+/*
+ * Read and decompress the datablock located at <start_block> in the
+ * filesystem. The cache is used here to avoid duplicating locking and
+ * read/decompress code.
+ */
+struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
+ u64 start_block, int length)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+
+ return squashfs_cache_get(sb, msblk->read_page, start_block, length);
+}
+
+
+/*
+ * Read a filesystem table (uncompressed sequence of bytes) from disk
+ */
+int squashfs_read_table(struct super_block *sb, void *buffer, u64 block,
+ int length)
+{
+ int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ int i, res;
+ void **data = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+ if (data == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
+ data[i] = buffer;
+ res = squashfs_read_data(sb, data, block, length |
+ SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length);
+ kfree(data);
+ return res;
+}
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
new file mode 100644
index 000000000000..566b0eaed868
--- /dev/null
+++ b/fs/squashfs/dir.c
@@ -0,0 +1,235 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * dir.c
+ */
+
+/*
+ * This file implements code to read directories from disk.
+ *
+ * See namei.c for a description of directory organisation on disk.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+static const unsigned char squashfs_filetype_table[] = {
+ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK
+};
+
+/*
+ * Lookup offset (f_pos) in the directory index, returning the
+ * metadata block containing it.
+ *
+ * If we get an error reading the index then return the part of the index
+ * (if any) we have managed to read - the index isn't essential, just
+ * quicker.
+ */
+static int get_dir_index_using_offset(struct super_block *sb,
+ u64 *next_block, int *next_offset, u64 index_start, int index_offset,
+ int i_count, u64 f_pos)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int err, i, index, length = 0;
+ struct squashfs_dir_index dir_index;
+
+ TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n",
+ i_count, f_pos);
+
+ /*
+ * Translate from external f_pos to the internal f_pos. This
+ * is offset by 3 because we invent "." and ".." entries which are
+ * not actually stored in the directory.
+ */
+ if (f_pos < 3)
+ return f_pos;
+ f_pos -= 3;
+
+ for (i = 0; i < i_count; i++) {
+ err = squashfs_read_metadata(sb, &dir_index, &index_start,
+ &index_offset, sizeof(dir_index));
+ if (err < 0)
+ break;
+
+ index = le32_to_cpu(dir_index.index);
+ if (index > f_pos)
+ /*
+ * Found the index we're looking for.
+ */
+ break;
+
+ err = squashfs_read_metadata(sb, NULL, &index_start,
+ &index_offset, le32_to_cpu(dir_index.size) + 1);
+ if (err < 0)
+ break;
+
+ length = index;
+ *next_block = le32_to_cpu(dir_index.start_block) +
+ msblk->directory_table;
+ }
+
+ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE;
+
+ /*
+ * Translate back from internal f_pos to external f_pos.
+ */
+ return length + 3;
+}
+
+
+static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ u64 block = squashfs_i(inode)->start + msblk->directory_table;
+ int offset = squashfs_i(inode)->offset, length = 0, dir_count, size,
+ type, err;
+ unsigned int inode_number;
+ struct squashfs_dir_header dirh;
+ struct squashfs_dir_entry *dire;
+
+ TRACE("Entered squashfs_readdir [%llx:%x]\n", block, offset);
+
+ dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL);
+ if (dire == NULL) {
+ ERROR("Failed to allocate squashfs_dir_entry\n");
+ goto finish;
+ }
+
+ /*
+ * Return "." and ".." entries as the first two filenames in the
+ * directory. To maximise compression these two entries are not
+ * stored in the directory, and so we invent them here.
+ *
+ * It also means that the external f_pos is offset by 3 from the
+ * on-disk directory f_pos.
+ */
+ while (file->f_pos < 3) {
+ char *name;
+ int i_ino;
+
+ if (file->f_pos == 0) {
+ name = ".";
+ size = 1;
+ i_ino = inode->i_ino;
+ } else {
+ name = "..";
+ size = 2;
+ i_ino = squashfs_i(inode)->parent;
+ }
+
+ TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n",
+ dirent, name, size, file->f_pos, i_ino,
+ squashfs_filetype_table[1]);
+
+ if (filldir(dirent, name, size, file->f_pos, i_ino,
+ squashfs_filetype_table[1]) < 0) {
+ TRACE("Filldir returned less than 0\n");
+ goto finish;
+ }
+
+ file->f_pos += size;
+ }
+
+ length = get_dir_index_using_offset(inode->i_sb, &block, &offset,
+ squashfs_i(inode)->dir_idx_start,
+ squashfs_i(inode)->dir_idx_offset,
+ squashfs_i(inode)->dir_idx_cnt,
+ file->f_pos);
+
+ while (length < i_size_read(inode)) {
+ /*
+ * Read directory header
+ */
+ err = squashfs_read_metadata(inode->i_sb, &dirh, &block,
+ &offset, sizeof(dirh));
+ if (err < 0)
+ goto failed_read;
+
+ length += sizeof(dirh);
+
+ dir_count = le32_to_cpu(dirh.count) + 1;
+ while (dir_count--) {
+ /*
+ * Read directory entry.
+ */
+ err = squashfs_read_metadata(inode->i_sb, dire, &block,
+ &offset, sizeof(*dire));
+ if (err < 0)
+ goto failed_read;
+
+ size = le16_to_cpu(dire->size) + 1;
+
+ err = squashfs_read_metadata(inode->i_sb, dire->name,
+ &block, &offset, size);
+ if (err < 0)
+ goto failed_read;
+
+ length += sizeof(*dire) + size;
+
+ if (file->f_pos >= length)
+ continue;
+
+ dire->name[size] = '\0';
+ inode_number = le32_to_cpu(dirh.inode_number) +
+ ((short) le16_to_cpu(dire->inode_number));
+ type = le16_to_cpu(dire->type);
+
+ TRACE("Calling filldir(%p, %s, %d, %lld, %x:%x, %d, %d)"
+ "\n", dirent, dire->name, size,
+ file->f_pos,
+ le32_to_cpu(dirh.start_block),
+ le16_to_cpu(dire->offset),
+ inode_number,
+ squashfs_filetype_table[type]);
+
+ if (filldir(dirent, dire->name, size, file->f_pos,
+ inode_number,
+ squashfs_filetype_table[type]) < 0) {
+ TRACE("Filldir returned less than 0\n");
+ goto finish;
+ }
+
+ file->f_pos = length;
+ }
+ }
+
+finish:
+ kfree(dire);
+ return 0;
+
+failed_read:
+ ERROR("Unable to read directory block [%llx:%x]\n", block, offset);
+ kfree(dire);
+ return 0;
+}
+
+
+const struct file_operations squashfs_dir_ops = {
+ .read = generic_read_dir,
+ .readdir = squashfs_readdir
+};
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
new file mode 100644
index 000000000000..69e971d5ddc1
--- /dev/null
+++ b/fs/squashfs/export.c
@@ -0,0 +1,155 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * export.c
+ */
+
+/*
+ * This file implements code to make Squashfs filesystems exportable (NFS etc.)
+ *
+ * The export code uses an inode lookup table to map inode numbers passed in
+ * filehandles to an inode location on disk. This table is stored compressed
+ * into metadata blocks. A second index table is used to locate these. This
+ * second index table for speed of access (and because it is small) is read at
+ * mount time and cached in memory.
+ *
+ * The inode lookup table is used only by the export code, inode disk
+ * locations are directly encoded in directories, enabling direct access
+ * without an intermediate lookup for all operations except the export ops.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/dcache.h>
+#include <linux/exportfs.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Look-up inode number (ino) in table, returning the inode location.
+ */
+static long long squashfs_inode_lookup(struct super_block *sb, int ino_num)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1);
+ int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1);
+ u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+ __le64 ino;
+ int err;
+
+ TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num);
+
+ err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino));
+ if (err < 0)
+ return err;
+
+ TRACE("squashfs_inode_lookup, inode = 0x%llx\n",
+ (u64) le64_to_cpu(ino));
+
+ return le64_to_cpu(ino);
+}
+
+
+static struct dentry *squashfs_export_iget(struct super_block *sb,
+ unsigned int ino_num)
+{
+ long long ino;
+ struct dentry *dentry = ERR_PTR(-ENOENT);
+
+ TRACE("Entered squashfs_export_iget\n");
+
+ ino = squashfs_inode_lookup(sb, ino_num);
+ if (ino >= 0)
+ dentry = d_obtain_alias(squashfs_iget(sb, ino, ino_num));
+
+ return dentry;
+}
+
+
+static struct dentry *squashfs_fh_to_dentry(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ if ((fh_type != FILEID_INO32_GEN && fh_type != FILEID_INO32_GEN_PARENT)
+ || fh_len < 2)
+ return NULL;
+
+ return squashfs_export_iget(sb, fid->i32.ino);
+}
+
+
+static struct dentry *squashfs_fh_to_parent(struct super_block *sb,
+ struct fid *fid, int fh_len, int fh_type)
+{
+ if (fh_type != FILEID_INO32_GEN_PARENT || fh_len < 4)
+ return NULL;
+
+ return squashfs_export_iget(sb, fid->i32.parent_ino);
+}
+
+
+static struct dentry *squashfs_get_parent(struct dentry *child)
+{
+ struct inode *inode = child->d_inode;
+ unsigned int parent_ino = squashfs_i(inode)->parent;
+
+ return squashfs_export_iget(inode->i_sb, parent_ino);
+}
+
+
+/*
+ * Read uncompressed inode lookup table indexes off disk into memory
+ */
+__le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
+ u64 lookup_table_start, unsigned int inodes)
+{
+ unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes);
+ __le64 *inode_lookup_table;
+ int err;
+
+ TRACE("In read_inode_lookup_table, length %d\n", length);
+
+ /* Allocate inode lookup table indexes */
+ inode_lookup_table = kmalloc(length, GFP_KERNEL);
+ if (inode_lookup_table == NULL) {
+ ERROR("Failed to allocate inode lookup table\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = squashfs_read_table(sb, inode_lookup_table, lookup_table_start,
+ length);
+ if (err < 0) {
+ ERROR("unable to read inode lookup table\n");
+ kfree(inode_lookup_table);
+ return ERR_PTR(err);
+ }
+
+ return inode_lookup_table;
+}
+
+
+const struct export_operations squashfs_export_ops = {
+ .fh_to_dentry = squashfs_fh_to_dentry,
+ .fh_to_parent = squashfs_fh_to_parent,
+ .get_parent = squashfs_get_parent
+};
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
new file mode 100644
index 000000000000..717767d831df
--- /dev/null
+++ b/fs/squashfs/file.c
@@ -0,0 +1,502 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * file.c
+ */
+
+/*
+ * This file contains code for handling regular files. A regular file
+ * consists of a sequence of contiguous compressed blocks, and/or a
+ * compressed fragment block (tail-end packed block). The compressed size
+ * of each datablock is stored in a block list contained within the
+ * file inode (itself stored in one or more compressed metadata blocks).
+ *
+ * To speed up access to datablocks when reading 'large' files (256 Mbytes or
+ * larger), the code implements an index cache that caches the mapping from
+ * block index to datablock location on disk.
+ *
+ * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while
+ * retaining a simple and space-efficient block list on disk. The cache
+ * is split into slots, caching up to eight 224 GiB files (128 KiB blocks).
+ * Larger files use multiple slots, with 1.75 TiB files using all 8 slots.
+ * The index cache is designed to be memory efficient, and by default uses
+ * 16 KiB.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Locate cache slot in range [offset, index] for specified inode. If
+ * there's more than one return the slot closest to index.
+ */
+static struct meta_index *locate_meta_index(struct inode *inode, int offset,
+ int index)
+{
+ struct meta_index *meta = NULL;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int i;
+
+ mutex_lock(&msblk->meta_index_mutex);
+
+ TRACE("locate_meta_index: index %d, offset %d\n", index, offset);
+
+ if (msblk->meta_index == NULL)
+ goto not_allocated;
+
+ for (i = 0; i < SQUASHFS_META_SLOTS; i++) {
+ if (msblk->meta_index[i].inode_number == inode->i_ino &&
+ msblk->meta_index[i].offset >= offset &&
+ msblk->meta_index[i].offset <= index &&
+ msblk->meta_index[i].locked == 0) {
+ TRACE("locate_meta_index: entry %d, offset %d\n", i,
+ msblk->meta_index[i].offset);
+ meta = &msblk->meta_index[i];
+ offset = meta->offset;
+ }
+ }
+
+ if (meta)
+ meta->locked = 1;
+
+not_allocated:
+ mutex_unlock(&msblk->meta_index_mutex);
+
+ return meta;
+}
+
+
+/*
+ * Find and initialise an empty cache slot for index offset.
+ */
+static struct meta_index *empty_meta_index(struct inode *inode, int offset,
+ int skip)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ struct meta_index *meta = NULL;
+ int i;
+
+ mutex_lock(&msblk->meta_index_mutex);
+
+ TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip);
+
+ if (msblk->meta_index == NULL) {
+ /*
+ * First time cache index has been used, allocate and
+ * initialise. The cache index could be allocated at
+ * mount time but doing it here means it is allocated only
+ * if a 'large' file is read.
+ */
+ msblk->meta_index = kcalloc(SQUASHFS_META_SLOTS,
+ sizeof(*(msblk->meta_index)), GFP_KERNEL);
+ if (msblk->meta_index == NULL) {
+ ERROR("Failed to allocate meta_index\n");
+ goto failed;
+ }
+ for (i = 0; i < SQUASHFS_META_SLOTS; i++) {
+ msblk->meta_index[i].inode_number = 0;
+ msblk->meta_index[i].locked = 0;
+ }
+ msblk->next_meta_index = 0;
+ }
+
+ for (i = SQUASHFS_META_SLOTS; i &&
+ msblk->meta_index[msblk->next_meta_index].locked; i--)
+ msblk->next_meta_index = (msblk->next_meta_index + 1) %
+ SQUASHFS_META_SLOTS;
+
+ if (i == 0) {
+ TRACE("empty_meta_index: failed!\n");
+ goto failed;
+ }
+
+ TRACE("empty_meta_index: returned meta entry %d, %p\n",
+ msblk->next_meta_index,
+ &msblk->meta_index[msblk->next_meta_index]);
+
+ meta = &msblk->meta_index[msblk->next_meta_index];
+ msblk->next_meta_index = (msblk->next_meta_index + 1) %
+ SQUASHFS_META_SLOTS;
+
+ meta->inode_number = inode->i_ino;
+ meta->offset = offset;
+ meta->skip = skip;
+ meta->entries = 0;
+ meta->locked = 1;
+
+failed:
+ mutex_unlock(&msblk->meta_index_mutex);
+ return meta;
+}
+
+
+static void release_meta_index(struct inode *inode, struct meta_index *meta)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ mutex_lock(&msblk->meta_index_mutex);
+ meta->locked = 0;
+ mutex_unlock(&msblk->meta_index_mutex);
+}
+
+
+/*
+ * Read the next n blocks from the block list, starting from
+ * metadata block <start_block, offset>.
+ */
+static long long read_indexes(struct super_block *sb, int n,
+ u64 *start_block, int *offset)
+{
+ int err, i;
+ long long block = 0;
+ __le32 *blist = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+
+ if (blist == NULL) {
+ ERROR("read_indexes: Failed to allocate block_list\n");
+ return -ENOMEM;
+ }
+
+ while (n) {
+ int blocks = min_t(int, n, PAGE_CACHE_SIZE >> 2);
+
+ err = squashfs_read_metadata(sb, blist, start_block,
+ offset, blocks << 2);
+ if (err < 0) {
+ ERROR("read_indexes: reading block [%llx:%x]\n",
+ *start_block, *offset);
+ goto failure;
+ }
+
+ for (i = 0; i < blocks; i++) {
+ int size = le32_to_cpu(blist[i]);
+ block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size);
+ }
+ n -= blocks;
+ }
+
+ kfree(blist);
+ return block;
+
+failure:
+ kfree(blist);
+ return err;
+}
+
+
+/*
+ * Each cache index slot has SQUASHFS_META_ENTRIES, each of which
+ * can cache one index -> datablock/blocklist-block mapping. We wish
+ * to distribute these over the length of the file, entry[0] maps index x,
+ * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on.
+ * The larger the file, the greater the skip factor. The skip factor is
+ * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure
+ * the number of metadata blocks that need to be read fits into the cache.
+ * If the skip factor is limited in this way then the file will use multiple
+ * slots.
+ */
+static inline int calculate_skip(int blocks)
+{
+ int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+ * SQUASHFS_META_INDEXES);
+ return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+}
+
+
+/*
+ * Search and grow the index cache for the specified inode, returning the
+ * on-disk locations of the datablock and block list metadata block
+ * <index_block, index_offset> for index (scaled to nearest cache index).
+ */
+static int fill_meta_index(struct inode *inode, int index,
+ u64 *index_block, int *index_offset, u64 *data_block)
+{
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int skip = calculate_skip(i_size_read(inode) >> msblk->block_log);
+ int offset = 0;
+ struct meta_index *meta;
+ struct meta_entry *meta_entry;
+ u64 cur_index_block = squashfs_i(inode)->block_list_start;
+ int cur_offset = squashfs_i(inode)->offset;
+ u64 cur_data_block = squashfs_i(inode)->start;
+ int err, i;
+
+ /*
+ * Scale index to cache index (cache slot entry)
+ */
+ index /= SQUASHFS_META_INDEXES * skip;
+
+ while (offset < index) {
+ meta = locate_meta_index(inode, offset + 1, index);
+
+ if (meta == NULL) {
+ meta = empty_meta_index(inode, offset + 1, skip);
+ if (meta == NULL)
+ goto all_done;
+ } else {
+ offset = index < meta->offset + meta->entries ? index :
+ meta->offset + meta->entries - 1;
+ meta_entry = &meta->meta_entry[offset - meta->offset];
+ cur_index_block = meta_entry->index_block +
+ msblk->inode_table;
+ cur_offset = meta_entry->offset;
+ cur_data_block = meta_entry->data_block;
+ TRACE("get_meta_index: offset %d, meta->offset %d, "
+ "meta->entries %d\n", offset, meta->offset,
+ meta->entries);
+ TRACE("get_meta_index: index_block 0x%llx, offset 0x%x"
+ " data_block 0x%llx\n", cur_index_block,
+ cur_offset, cur_data_block);
+ }
+
+ /*
+ * If necessary grow cache slot by reading block list. Cache
+ * slot is extended up to index or to the end of the slot, in
+ * which case further slots will be used.
+ */
+ for (i = meta->offset + meta->entries; i <= index &&
+ i < meta->offset + SQUASHFS_META_ENTRIES; i++) {
+ int blocks = skip * SQUASHFS_META_INDEXES;
+ long long res = read_indexes(inode->i_sb, blocks,
+ &cur_index_block, &cur_offset);
+
+ if (res < 0) {
+ if (meta->entries == 0)
+ /*
+ * Don't leave an empty slot on read
+ * error allocated to this inode...
+ */
+ meta->inode_number = 0;
+ err = res;
+ goto failed;
+ }
+
+ cur_data_block += res;
+ meta_entry = &meta->meta_entry[i - meta->offset];
+ meta_entry->index_block = cur_index_block -
+ msblk->inode_table;
+ meta_entry->offset = cur_offset;
+ meta_entry->data_block = cur_data_block;
+ meta->entries++;
+ offset++;
+ }
+
+ TRACE("get_meta_index: meta->offset %d, meta->entries %d\n",
+ meta->offset, meta->entries);
+
+ release_meta_index(inode, meta);
+ }
+
+all_done:
+ *index_block = cur_index_block;
+ *index_offset = cur_offset;
+ *data_block = cur_data_block;
+
+ /*
+ * Scale cache index (cache slot entry) to index
+ */
+ return offset * SQUASHFS_META_INDEXES * skip;
+
+failed:
+ release_meta_index(inode, meta);
+ return err;
+}
+
+
+/*
+ * Get the on-disk location and compressed size of the datablock
+ * specified by index. Fill_meta_index() does most of the work.
+ */
+static int read_blocklist(struct inode *inode, int index, u64 *block)
+{
+ u64 start;
+ long long blks;
+ int offset;
+ __le32 size;
+ int res = fill_meta_index(inode, index, &start, &offset, block);
+
+ TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset"
+ " 0x%x, block 0x%llx\n", res, index, start, offset,
+ *block);
+
+ if (res < 0)
+ return res;
+
+ /*
+ * res contains the index of the mapping returned by fill_meta_index(),
+ * this will likely be less than the desired index (because the
+ * meta_index cache works at a higher granularity). Read any
+ * extra block indexes needed.
+ */
+ if (res < index) {
+ blks = read_indexes(inode->i_sb, index - res, &start, &offset);
+ if (blks < 0)
+ return (int) blks;
+ *block += blks;
+ }
+
+ /*
+ * Read length of block specified by index.
+ */
+ res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset,
+ sizeof(size));
+ if (res < 0)
+ return res;
+ return le32_to_cpu(size);
+}
+
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+ int bytes, i, offset = 0, sparse = 0;
+ struct squashfs_cache_entry *buffer = NULL;
+ void *pageaddr;
+
+ int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ int start_index = page->index & ~mask;
+ int end_index = start_index | mask;
+ int file_end = i_size_read(inode) >> msblk->block_log;
+
+ TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+ page->index, squashfs_i(inode)->start);
+
+ if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT))
+ goto out;
+
+ if (index < file_end || squashfs_i(inode)->fragment_block ==
+ SQUASHFS_INVALID_BLK) {
+ /*
+ * Reading a datablock from disk. Need to read block list
+ * to get location and block size.
+ */
+ u64 block = 0;
+ int bsize = read_blocklist(inode, index, &block);
+ if (bsize < 0)
+ goto error_out;
+
+ if (bsize == 0) { /* hole */
+ bytes = index == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+ sparse = 1;
+ } else {
+ /*
+ * Read and decompress datablock.
+ */
+ buffer = squashfs_get_datablock(inode->i_sb,
+ block, bsize);
+ if (buffer->error) {
+ ERROR("Unable to read page, block %llx, size %x"
+ "\n", block, bsize);
+ squashfs_cache_put(buffer);
+ goto error_out;
+ }
+ bytes = buffer->length;
+ }
+ } else {
+ /*
+ * Datablock is stored inside a fragment (tail-end packed
+ * block).
+ */
+ buffer = squashfs_get_fragment(inode->i_sb,
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+
+ if (buffer->error) {
+ ERROR("Unable to read page, block %llx, size %x\n",
+ squashfs_i(inode)->fragment_block,
+ squashfs_i(inode)->fragment_size);
+ squashfs_cache_put(buffer);
+ goto error_out;
+ }
+ bytes = i_size_read(inode) & (msblk->block_size - 1);
+ offset = squashfs_i(inode)->fragment_offset;
+ }
+
+ /*
+ * Loop copying datablock into pages. As the datablock likely covers
+ * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+ * grab the pages from the page cache, except for the page that we've
+ * been called to fill.
+ */
+ for (i = start_index; i <= end_index && bytes > 0; i++,
+ bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+ struct page *push_page;
+ int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+
+ TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
+
+ push_page = (i == page->index) ? page :
+ grab_cache_page_nowait(page->mapping, i);
+
+ if (!push_page)
+ continue;
+
+ if (PageUptodate(push_page))
+ goto skip_page;
+
+ pageaddr = kmap_atomic(push_page, KM_USER0);
+ squashfs_copy_data(pageaddr, buffer, offset, avail);
+ memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+ kunmap_atomic(pageaddr, KM_USER0);
+ flush_dcache_page(push_page);
+ SetPageUptodate(push_page);
+skip_page:
+ unlock_page(push_page);
+ if (i != page->index)
+ page_cache_release(push_page);
+ }
+
+ if (!sparse)
+ squashfs_cache_put(buffer);
+
+ return 0;
+
+error_out:
+ SetPageError(page);
+out:
+ pageaddr = kmap_atomic(page, KM_USER0);
+ memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ kunmap_atomic(pageaddr, KM_USER0);
+ flush_dcache_page(page);
+ if (!PageError(page))
+ SetPageUptodate(page);
+ unlock_page(page);
+
+ return 0;
+}
+
+
+const struct address_space_operations squashfs_aops = {
+ .readpage = squashfs_readpage
+};
diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c
new file mode 100644
index 000000000000..b5a2c15bbbc7
--- /dev/null
+++ b/fs/squashfs/fragment.c
@@ -0,0 +1,98 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * fragment.c
+ */
+
+/*
+ * This file implements code to handle compressed fragments (tail-end packed
+ * datablocks).
+ *
+ * Regular files contain a fragment index which is mapped to a fragment
+ * location on disk and compressed size using a fragment lookup table.
+ * Like everything in Squashfs this fragment lookup table is itself stored
+ * compressed into metadata blocks. A second index table is used to locate
+ * these. This second index table for speed of access (and because it
+ * is small) is read at mount time and cached in memory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Look-up fragment using the fragment index table. Return the on disk
+ * location of the fragment and its compressed size
+ */
+int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment,
+ u64 *fragment_block)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int block = SQUASHFS_FRAGMENT_INDEX(fragment);
+ int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment);
+ u64 start_block = le64_to_cpu(msblk->fragment_index[block]);
+ struct squashfs_fragment_entry fragment_entry;
+ int size;
+
+ size = squashfs_read_metadata(sb, &fragment_entry, &start_block,
+ &offset, sizeof(fragment_entry));
+ if (size < 0)
+ return size;
+
+ *fragment_block = le64_to_cpu(fragment_entry.start_block);
+ size = le32_to_cpu(fragment_entry.size);
+
+ return size;
+}
+
+
+/*
+ * Read the uncompressed fragment lookup table indexes off disk into memory
+ */
+__le64 *squashfs_read_fragment_index_table(struct super_block *sb,
+ u64 fragment_table_start, unsigned int fragments)
+{
+ unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(fragments);
+ __le64 *fragment_index;
+ int err;
+
+ /* Allocate fragment lookup table indexes */
+ fragment_index = kmalloc(length, GFP_KERNEL);
+ if (fragment_index == NULL) {
+ ERROR("Failed to allocate fragment index table\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = squashfs_read_table(sb, fragment_index, fragment_table_start,
+ length);
+ if (err < 0) {
+ ERROR("unable to read fragment index table\n");
+ kfree(fragment_index);
+ return ERR_PTR(err);
+ }
+
+ return fragment_index;
+}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
new file mode 100644
index 000000000000..3795b837ba28
--- /dev/null
+++ b/fs/squashfs/id.c
@@ -0,0 +1,94 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * id.c
+ */
+
+/*
+ * This file implements code to handle uids and gids.
+ *
+ * For space efficiency regular files store uid and gid indexes, which are
+ * converted to 32-bit uids/gids using an id look up table. This table is
+ * stored compressed into metadata blocks. A second index table is used to
+ * locate these. This second index table for speed of access (and because it
+ * is small) is read at mount time and cached in memory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Map uid/gid index into real 32-bit uid/gid using the id look up table
+ */
+int squashfs_get_id(struct super_block *sb, unsigned int index,
+ unsigned int *id)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int block = SQUASHFS_ID_BLOCK(index);
+ int offset = SQUASHFS_ID_BLOCK_OFFSET(index);
+ u64 start_block = le64_to_cpu(msblk->id_table[block]);
+ __le32 disk_id;
+ int err;
+
+ err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset,
+ sizeof(disk_id));
+ if (err < 0)
+ return err;
+
+ *id = le32_to_cpu(disk_id);
+ return 0;
+}
+
+
+/*
+ * Read uncompressed id lookup table indexes from disk into memory
+ */
+__le64 *squashfs_read_id_index_table(struct super_block *sb,
+ u64 id_table_start, unsigned short no_ids)
+{
+ unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids);
+ __le64 *id_table;
+ int err;
+
+ TRACE("In read_id_index_table, length %d\n", length);
+
+ /* Allocate id lookup table indexes */
+ id_table = kmalloc(length, GFP_KERNEL);
+ if (id_table == NULL) {
+ ERROR("Failed to allocate id index table\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = squashfs_read_table(sb, id_table, id_table_start, length);
+ if (err < 0) {
+ ERROR("unable to read id index table\n");
+ kfree(id_table);
+ return ERR_PTR(err);
+ }
+
+ return id_table;
+}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
new file mode 100644
index 000000000000..7a63398bb855
--- /dev/null
+++ b/fs/squashfs/inode.c
@@ -0,0 +1,346 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * inode.c
+ */
+
+/*
+ * This file implements code to create and read inodes from disk.
+ *
+ * Inodes in Squashfs are identified by a 48-bit inode which encodes the
+ * location of the compressed metadata block containing the inode, and the byte
+ * offset into that block where the inode is placed (<block, offset>).
+ *
+ * To maximise compression there are different inodes for each file type
+ * (regular file, directory, device, etc.), the inode contents and length
+ * varying with the type.
+ *
+ * To further maximise compression, two types of regular file inode and
+ * directory inode are defined: inodes optimised for frequently occurring
+ * regular files and directories, and extended types where extra
+ * information has to be stored.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Initialise VFS inode with the base inode information common to all
+ * Squashfs inode types. Sqsh_ino contains the unswapped base inode
+ * off disk.
+ */
+static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
+ struct squashfs_base_inode *sqsh_ino)
+{
+ int err;
+
+ err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &inode->i_uid);
+ if (err)
+ return err;
+
+ err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &inode->i_gid);
+ if (err)
+ return err;
+
+ inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+ inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime);
+ inode->i_atime.tv_sec = inode->i_mtime.tv_sec;
+ inode->i_ctime.tv_sec = inode->i_mtime.tv_sec;
+ inode->i_mode = le16_to_cpu(sqsh_ino->mode);
+ inode->i_size = 0;
+
+ return err;
+}
+
+
+struct inode *squashfs_iget(struct super_block *sb, long long ino,
+ unsigned int ino_number)
+{
+ struct inode *inode = iget_locked(sb, ino_number);
+ int err;
+
+ TRACE("Entered squashfs_iget\n");
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+ if (!(inode->i_state & I_NEW))
+ return inode;
+
+ err = squashfs_read_inode(inode, ino);
+ if (err) {
+ iget_failed(inode);
+ return ERR_PTR(err);
+ }
+
+ unlock_new_inode(inode);
+ return inode;
+}
+
+
+/*
+ * Initialise VFS inode by reading inode from inode table (compressed
+ * metadata). The format and amount of data read depends on type.
+ */
+int squashfs_read_inode(struct inode *inode, long long ino)
+{
+ struct super_block *sb = inode->i_sb;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ u64 block = SQUASHFS_INODE_BLK(ino) + msblk->inode_table;
+ int err, type, offset = SQUASHFS_INODE_OFFSET(ino);
+ union squashfs_inode squashfs_ino;
+ struct squashfs_base_inode *sqshb_ino = &squashfs_ino.base;
+
+ TRACE("Entered squashfs_read_inode\n");
+
+ /*
+ * Read inode base common to all inode types.
+ */
+ err = squashfs_read_metadata(sb, sqshb_ino, &block,
+ &offset, sizeof(*sqshb_ino));
+ if (err < 0)
+ goto failed_read;
+
+ err = squashfs_new_inode(sb, inode, sqshb_ino);
+ if (err)
+ goto failed_read;
+
+ block = SQUASHFS_INODE_BLK(ino) + msblk->inode_table;
+ offset = SQUASHFS_INODE_OFFSET(ino);
+
+ type = le16_to_cpu(sqshb_ino->inode_type);
+ switch (type) {
+ case SQUASHFS_REG_TYPE: {
+ unsigned int frag_offset, frag_size, frag;
+ u64 frag_blk;
+ struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg;
+
+ err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset,
+ sizeof(*sqsh_ino));
+ if (err < 0)
+ goto failed_read;
+
+ frag = le32_to_cpu(sqsh_ino->fragment);
+ if (frag != SQUASHFS_INVALID_FRAG) {
+ frag_offset = le32_to_cpu(sqsh_ino->offset);
+ frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
+ if (frag_size < 0) {
+ err = frag_size;
+ goto failed_read;
+ }
+ } else {
+ frag_blk = SQUASHFS_INVALID_BLK;
+ frag_size = 0;
+ frag_offset = 0;
+ }
+
+ inode->i_nlink = 1;
+ inode->i_size = le32_to_cpu(sqsh_ino->file_size);
+ inode->i_fop = &generic_ro_fops;
+ inode->i_mode |= S_IFREG;
+ inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
+ squashfs_i(inode)->fragment_block = frag_blk;
+ squashfs_i(inode)->fragment_size = frag_size;
+ squashfs_i(inode)->fragment_offset = frag_offset;
+ squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
+ squashfs_i(inode)->block_list_start = block;
+ squashfs_i(inode)->offset = offset;
+ inode->i_data.a_ops = &squashfs_aops;
+
+ TRACE("File inode %x:%x, start_block %llx, block_list_start "
+ "%llx, offset %x\n", SQUASHFS_INODE_BLK(ino),
+ offset, squashfs_i(inode)->start, block, offset);
+ break;
+ }
+ case SQUASHFS_LREG_TYPE: {
+ unsigned int frag_offset, frag_size, frag;
+ u64 frag_blk;
+ struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg;
+
+ err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset,
+ sizeof(*sqsh_ino));
+ if (err < 0)
+ goto failed_read;
+
+ frag = le32_to_cpu(sqsh_ino->fragment);
+ if (frag != SQUASHFS_INVALID_FRAG) {
+ frag_offset = le32_to_cpu(sqsh_ino->offset);
+ frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
+ if (frag_size < 0) {
+ err = frag_size;
+ goto failed_read;
+ }
+ } else {
+ frag_blk = SQUASHFS_INVALID_BLK;
+ frag_size = 0;
+ frag_offset = 0;
+ }
+
+ inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ inode->i_size = le64_to_cpu(sqsh_ino->file_size);
+ inode->i_fop = &generic_ro_fops;
+ inode->i_mode |= S_IFREG;
+ inode->i_blocks = ((inode->i_size -
+ le64_to_cpu(sqsh_ino->sparse) - 1) >> 9) + 1;
+
+ squashfs_i(inode)->fragment_block = frag_blk;
+ squashfs_i(inode)->fragment_size = frag_size;
+ squashfs_i(inode)->fragment_offset = frag_offset;
+ squashfs_i(inode)->start = le64_to_cpu(sqsh_ino->start_block);
+ squashfs_i(inode)->block_list_start = block;
+ squashfs_i(inode)->offset = offset;
+ inode->i_data.a_ops = &squashfs_aops;
+
+ TRACE("File inode %x:%x, start_block %llx, block_list_start "
+ "%llx, offset %x\n", SQUASHFS_INODE_BLK(ino),
+ offset, squashfs_i(inode)->start, block, offset);
+ break;
+ }
+ case SQUASHFS_DIR_TYPE: {
+ struct squashfs_dir_inode *sqsh_ino = &squashfs_ino.dir;
+
+ err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset,
+ sizeof(*sqsh_ino));
+ if (err < 0)
+ goto failed_read;
+
+ inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ inode->i_size = le16_to_cpu(sqsh_ino->file_size);
+ inode->i_op = &squashfs_dir_inode_ops;
+ inode->i_fop = &squashfs_dir_ops;
+ inode->i_mode |= S_IFDIR;
+ squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
+ squashfs_i(inode)->offset = le16_to_cpu(sqsh_ino->offset);
+ squashfs_i(inode)->dir_idx_cnt = 0;
+ squashfs_i(inode)->parent = le32_to_cpu(sqsh_ino->parent_inode);
+
+ TRACE("Directory inode %x:%x, start_block %llx, offset %x\n",
+ SQUASHFS_INODE_BLK(ino), offset,
+ squashfs_i(inode)->start,
+ le16_to_cpu(sqsh_ino->offset));
+ break;
+ }
+ case SQUASHFS_LDIR_TYPE: {
+ struct squashfs_ldir_inode *sqsh_ino = &squashfs_ino.ldir;
+
+ err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset,
+ sizeof(*sqsh_ino));
+ if (err < 0)
+ goto failed_read;
+
+ inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ inode->i_size = le32_to_cpu(sqsh_ino->file_size);
+ inode->i_op = &squashfs_dir_inode_ops;
+ inode->i_fop = &squashfs_dir_ops;
+ inode->i_mode |= S_IFDIR;
+ squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
+ squashfs_i(inode)->offset = le16_to_cpu(sqsh_ino->offset);
+ squashfs_i(inode)->dir_idx_start = block;
+ squashfs_i(inode)->dir_idx_offset = offset;
+ squashfs_i(inode)->dir_idx_cnt = le16_to_cpu(sqsh_ino->i_count);
+ squashfs_i(inode)->parent = le32_to_cpu(sqsh_ino->parent_inode);
+
+ TRACE("Long directory inode %x:%x, start_block %llx, offset "
+ "%x\n", SQUASHFS_INODE_BLK(ino), offset,
+ squashfs_i(inode)->start,
+ le16_to_cpu(sqsh_ino->offset));
+ break;
+ }
+ case SQUASHFS_SYMLINK_TYPE:
+ case SQUASHFS_LSYMLINK_TYPE: {
+ struct squashfs_symlink_inode *sqsh_ino = &squashfs_ino.symlink;
+
+ err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset,
+ sizeof(*sqsh_ino));
+ if (err < 0)
+ goto failed_read;
+
+ inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
+ inode->i_op = &page_symlink_inode_operations;
+ inode->i_data.a_ops = &squashfs_symlink_aops;
+ inode->i_mode |= S_IFLNK;
+ squashfs_i(inode)->start = block;
+ squashfs_i(inode)->offset = offset;
+
+ TRACE("Symbolic link inode %x:%x, start_block %llx, offset "
+ "%x\n", SQUASHFS_INODE_BLK(ino), offset,
+ block, offset);
+ break;
+ }
+ case SQUASHFS_BLKDEV_TYPE:
+ case SQUASHFS_CHRDEV_TYPE:
+ case SQUASHFS_LBLKDEV_TYPE:
+ case SQUASHFS_LCHRDEV_TYPE: {
+ struct squashfs_dev_inode *sqsh_ino = &squashfs_ino.dev;
+ unsigned int rdev;
+
+ err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset,
+ sizeof(*sqsh_ino));
+ if (err < 0)
+ goto failed_read;
+
+ if (type == SQUASHFS_CHRDEV_TYPE)
+ inode->i_mode |= S_IFCHR;
+ else
+ inode->i_mode |= S_IFBLK;
+ inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ rdev = le32_to_cpu(sqsh_ino->rdev);
+ init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+
+ TRACE("Device inode %x:%x, rdev %x\n",
+ SQUASHFS_INODE_BLK(ino), offset, rdev);
+ break;
+ }
+ case SQUASHFS_FIFO_TYPE:
+ case SQUASHFS_SOCKET_TYPE:
+ case SQUASHFS_LFIFO_TYPE:
+ case SQUASHFS_LSOCKET_TYPE: {
+ struct squashfs_ipc_inode *sqsh_ino = &squashfs_ino.ipc;
+
+ err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset,
+ sizeof(*sqsh_ino));
+ if (err < 0)
+ goto failed_read;
+
+ if (type == SQUASHFS_FIFO_TYPE)
+ inode->i_mode |= S_IFIFO;
+ else
+ inode->i_mode |= S_IFSOCK;
+ inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ init_special_inode(inode, inode->i_mode, 0);
+ break;
+ }
+ default:
+ ERROR("Unknown inode type %d in squashfs_iget!\n", type);
+ return -EINVAL;
+ }
+
+ return 0;
+
+failed_read:
+ ERROR("Unable to read inode 0x%llx\n", ino);
+ return err;
+}
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
new file mode 100644
index 000000000000..9e398653b22b
--- /dev/null
+++ b/fs/squashfs/namei.c
@@ -0,0 +1,242 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * namei.c
+ */
+
+/*
+ * This file implements code to do filename lookup in directories.
+ *
+ * Like inodes, directories are packed into compressed metadata blocks, stored
+ * in a directory table. Directories are accessed using the start address of
+ * the metablock containing the directory and the offset into the
+ * decompressed block (<block, offset>).
+ *
+ * Directories are organised in a slightly complex way, and are not simply
+ * a list of file names. The organisation takes advantage of the
+ * fact that (in most cases) the inodes of the files will be in the same
+ * compressed metadata block, and therefore, can share the start block.
+ * Directories are therefore organised in a two level list, a directory
+ * header containing the shared start block value, and a sequence of directory
+ * entries, each of which share the shared start block. A new directory header
+ * is written once/if the inode start block changes. The directory
+ * header/directory entry list is repeated as many times as necessary.
+ *
+ * Directories are sorted, and can contain a directory index to speed up
+ * file lookup. Directory indexes store one entry per metablock, each entry
+ * storing the index/filename mapping to the first directory header
+ * in each metadata block. Directories are sorted in alphabetical order,
+ * and at lookup the index is scanned linearly looking for the first filename
+ * alphabetically larger than the filename being looked up. At this point the
+ * location of the metadata block the filename is in has been found.
+ * The general idea of the index is ensure only one metadata block needs to be
+ * decompressed to do a lookup irrespective of the length of the directory.
+ * This scheme has the advantage that it doesn't require extra memory overhead
+ * and doesn't require much extra storage on disk.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dcache.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/*
+ * Lookup name in the directory index, returning the location of the metadata
+ * block containing it, and the directory index this represents.
+ *
+ * If we get an error reading the index then return the part of the index
+ * (if any) we have managed to read - the index isn't essential, just
+ * quicker.
+ */
+static int get_dir_index_using_name(struct super_block *sb,
+ u64 *next_block, int *next_offset, u64 index_start,
+ int index_offset, int i_count, const char *name,
+ int len)
+{
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int i, size, length = 0, err;
+ struct squashfs_dir_index *index;
+ char *str;
+
+ TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count);
+
+ index = kmalloc(sizeof(*index) + SQUASHFS_NAME_LEN * 2 + 2, GFP_KERNEL);
+ if (index == NULL) {
+ ERROR("Failed to allocate squashfs_dir_index\n");
+ goto out;
+ }
+
+ str = &index->name[SQUASHFS_NAME_LEN + 1];
+ strncpy(str, name, len);
+ str[len] = '\0';
+
+ for (i = 0; i < i_count; i++) {
+ err = squashfs_read_metadata(sb, index, &index_start,
+ &index_offset, sizeof(*index));
+ if (err < 0)
+ break;
+
+
+ size = le32_to_cpu(index->size) + 1;
+
+ err = squashfs_read_metadata(sb, index->name, &index_start,
+ &index_offset, size);
+ if (err < 0)
+ break;
+
+ index->name[size] = '\0';
+
+ if (strcmp(index->name, str) > 0)
+ break;
+
+ length = le32_to_cpu(index->index);
+ *next_block = le32_to_cpu(index->start_block) +
+ msblk->directory_table;
+ }
+
+ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE;
+ kfree(index);
+
+out:
+ /*
+ * Return index (f_pos) of the looked up metadata block. Translate
+ * from internal f_pos to external f_pos which is offset by 3 because
+ * we invent "." and ".." entries which are not actually stored in the
+ * directory.
+ */
+ return length + 3;
+}
+
+
+static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ const unsigned char *name = dentry->d_name.name;
+ int len = dentry->d_name.len;
+ struct inode *inode = NULL;
+ struct squashfs_sb_info *msblk = dir->i_sb->s_fs_info;
+ struct squashfs_dir_header dirh;
+ struct squashfs_dir_entry *dire;
+ u64 block = squashfs_i(dir)->start + msblk->directory_table;
+ int offset = squashfs_i(dir)->offset;
+ int err, length = 0, dir_count, size;
+
+ TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset);
+
+ dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL);
+ if (dire == NULL) {
+ ERROR("Failed to allocate squashfs_dir_entry\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (len > SQUASHFS_NAME_LEN) {
+ err = -ENAMETOOLONG;
+ goto failed;
+ }
+
+ length = get_dir_index_using_name(dir->i_sb, &block, &offset,
+ squashfs_i(dir)->dir_idx_start,
+ squashfs_i(dir)->dir_idx_offset,
+ squashfs_i(dir)->dir_idx_cnt, name, len);
+
+ while (length < i_size_read(dir)) {
+ /*
+ * Read directory header.
+ */
+ err = squashfs_read_metadata(dir->i_sb, &dirh, &block,
+ &offset, sizeof(dirh));
+ if (err < 0)
+ goto read_failure;
+
+ length += sizeof(dirh);
+
+ dir_count = le32_to_cpu(dirh.count) + 1;
+ while (dir_count--) {
+ /*
+ * Read directory entry.
+ */
+ err = squashfs_read_metadata(dir->i_sb, dire, &block,
+ &offset, sizeof(*dire));
+ if (err < 0)
+ goto read_failure;
+
+ size = le16_to_cpu(dire->size) + 1;
+
+ err = squashfs_read_metadata(dir->i_sb, dire->name,
+ &block, &offset, size);
+ if (err < 0)
+ goto read_failure;
+
+ length += sizeof(*dire) + size;
+
+ if (name[0] < dire->name[0])
+ goto exit_lookup;
+
+ if (len == size && !strncmp(name, dire->name, len)) {
+ unsigned int blk, off, ino_num;
+ long long ino;
+ blk = le32_to_cpu(dirh.start_block);
+ off = le16_to_cpu(dire->offset);
+ ino_num = le32_to_cpu(dirh.inode_number) +
+ (short) le16_to_cpu(dire->inode_number);
+ ino = SQUASHFS_MKINODE(blk, off);
+
+ TRACE("calling squashfs_iget for directory "
+ "entry %s, inode %x:%x, %d\n", name,
+ blk, off, ino_num);
+
+ inode = squashfs_iget(dir->i_sb, ino, ino_num);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto failed;
+ }
+
+ goto exit_lookup;
+ }
+ }
+ }
+
+exit_lookup:
+ kfree(dire);
+ if (inode)
+ return d_splice_alias(inode, dentry);
+ d_add(dentry, inode);
+ return ERR_PTR(0);
+
+read_failure:
+ ERROR("Unable to read directory block [%llx:%x]\n",
+ squashfs_i(dir)->start + msblk->directory_table,
+ squashfs_i(dir)->offset);
+failed:
+ kfree(dire);
+ return ERR_PTR(err);
+}
+
+
+const struct inode_operations squashfs_dir_inode_ops = {
+ .lookup = squashfs_lookup
+};
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
new file mode 100644
index 000000000000..6b2515d027d5
--- /dev/null
+++ b/fs/squashfs/squashfs.h
@@ -0,0 +1,90 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * squashfs.h
+ */
+
+#define TRACE(s, args...) pr_debug("SQUASHFS: "s, ## args)
+
+#define ERROR(s, args...) pr_err("SQUASHFS error: "s, ## args)
+
+#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args)
+
+static inline struct squashfs_inode_info *squashfs_i(struct inode *inode)
+{
+ return list_entry(inode, struct squashfs_inode_info, vfs_inode);
+}
+
+/* block.c */
+extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
+ int);
+
+/* cache.c */
+extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
+extern void squashfs_cache_delete(struct squashfs_cache *);
+extern struct squashfs_cache_entry *squashfs_cache_get(struct super_block *,
+ struct squashfs_cache *, u64, int);
+extern void squashfs_cache_put(struct squashfs_cache_entry *);
+extern int squashfs_copy_data(void *, struct squashfs_cache_entry *, int, int);
+extern int squashfs_read_metadata(struct super_block *, void *, u64 *,
+ int *, int);
+extern struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *,
+ u64, int);
+extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *,
+ u64, int);
+extern int squashfs_read_table(struct super_block *, void *, u64, int);
+
+/* export.c */
+extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64,
+ unsigned int);
+
+/* fragment.c */
+extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
+extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
+ u64, unsigned int);
+
+/* id.c */
+extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
+extern __le64 *squashfs_read_id_index_table(struct super_block *, u64,
+ unsigned short);
+
+/* inode.c */
+extern struct inode *squashfs_iget(struct super_block *, long long,
+ unsigned int);
+extern int squashfs_read_inode(struct inode *, long long);
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern const struct file_operations squashfs_dir_ops;
+
+/* export.c */
+extern const struct export_operations squashfs_export_ops;
+
+/* file.c */
+extern const struct address_space_operations squashfs_aops;
+
+/* namei.c */
+extern const struct inode_operations squashfs_dir_inode_ops;
+
+/* symlink.c */
+extern const struct address_space_operations squashfs_symlink_aops;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
new file mode 100644
index 000000000000..6840da1bf21e
--- /dev/null
+++ b/fs/squashfs/squashfs_fs.h
@@ -0,0 +1,381 @@
+#ifndef SQUASHFS_FS
+#define SQUASHFS_FS
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * squashfs_fs.h
+ */
+
+#define SQUASHFS_CACHED_FRAGMENTS CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE
+#define SQUASHFS_MAJOR 4
+#define SQUASHFS_MINOR 0
+#define SQUASHFS_MAGIC 0x73717368
+#define SQUASHFS_START 0
+
+/* size of metadata (inode and directory) blocks */
+#define SQUASHFS_METADATA_SIZE 8192
+#define SQUASHFS_METADATA_LOG 13
+
+/* default size of data blocks */
+#define SQUASHFS_FILE_SIZE 131072
+#define SQUASHFS_FILE_LOG 17
+
+#define SQUASHFS_FILE_MAX_SIZE 1048576
+#define SQUASHFS_FILE_MAX_LOG 20
+
+/* Max number of uids and gids */
+#define SQUASHFS_IDS 65536
+
+/* Max length of filename (not 255) */
+#define SQUASHFS_NAME_LEN 256
+
+#define SQUASHFS_INVALID_FRAG (0xffffffffU)
+#define SQUASHFS_INVALID_BLK (-1LL)
+
+/* Filesystem flags */
+#define SQUASHFS_NOI 0
+#define SQUASHFS_NOD 1
+#define SQUASHFS_NOF 3
+#define SQUASHFS_NO_FRAG 4
+#define SQUASHFS_ALWAYS_FRAG 5
+#define SQUASHFS_DUPLICATE 6
+#define SQUASHFS_EXPORT 7
+
+#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1)
+
+#define SQUASHFS_UNCOMPRESSED_INODES(flags) SQUASHFS_BIT(flags, \
+ SQUASHFS_NOI)
+
+#define SQUASHFS_UNCOMPRESSED_DATA(flags) SQUASHFS_BIT(flags, \
+ SQUASHFS_NOD)
+
+#define SQUASHFS_UNCOMPRESSED_FRAGMENTS(flags) SQUASHFS_BIT(flags, \
+ SQUASHFS_NOF)
+
+#define SQUASHFS_NO_FRAGMENTS(flags) SQUASHFS_BIT(flags, \
+ SQUASHFS_NO_FRAG)
+
+#define SQUASHFS_ALWAYS_FRAGMENTS(flags) SQUASHFS_BIT(flags, \
+ SQUASHFS_ALWAYS_FRAG)
+
+#define SQUASHFS_DUPLICATES(flags) SQUASHFS_BIT(flags, \
+ SQUASHFS_DUPLICATE)
+
+#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \
+ SQUASHFS_EXPORT)
+
+/* Max number of types and file types */
+#define SQUASHFS_DIR_TYPE 1
+#define SQUASHFS_REG_TYPE 2
+#define SQUASHFS_SYMLINK_TYPE 3
+#define SQUASHFS_BLKDEV_TYPE 4
+#define SQUASHFS_CHRDEV_TYPE 5
+#define SQUASHFS_FIFO_TYPE 6
+#define SQUASHFS_SOCKET_TYPE 7
+#define SQUASHFS_LDIR_TYPE 8
+#define SQUASHFS_LREG_TYPE 9
+#define SQUASHFS_LSYMLINK_TYPE 10
+#define SQUASHFS_LBLKDEV_TYPE 11
+#define SQUASHFS_LCHRDEV_TYPE 12
+#define SQUASHFS_LFIFO_TYPE 13
+#define SQUASHFS_LSOCKET_TYPE 14
+
+/* Flag whether block is compressed or uncompressed, bit is set if block is
+ * uncompressed */
+#define SQUASHFS_COMPRESSED_BIT (1 << 15)
+
+#define SQUASHFS_COMPRESSED_SIZE(B) (((B) & ~SQUASHFS_COMPRESSED_BIT) ? \
+ (B) & ~SQUASHFS_COMPRESSED_BIT : SQUASHFS_COMPRESSED_BIT)
+
+#define SQUASHFS_COMPRESSED(B) (!((B) & SQUASHFS_COMPRESSED_BIT))
+
+#define SQUASHFS_COMPRESSED_BIT_BLOCK (1 << 24)
+
+#define SQUASHFS_COMPRESSED_SIZE_BLOCK(B) ((B) & \
+ ~SQUASHFS_COMPRESSED_BIT_BLOCK)
+
+#define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK))
+
+/*
+ * Inode number ops. Inodes consist of a compressed block number, and an
+ * uncompressed offset within that block
+ */
+#define SQUASHFS_INODE_BLK(A) ((unsigned int) ((A) >> 16))
+
+#define SQUASHFS_INODE_OFFSET(A) ((unsigned int) ((A) & 0xffff))
+
+#define SQUASHFS_MKINODE(A, B) ((long long)(((long long) (A)\
+ << 16) + (B)))
+
+/* Translate between VFS mode and squashfs mode */
+#define SQUASHFS_MODE(A) ((A) & 0xfff)
+
+/* fragment and fragment table defines */
+#define SQUASHFS_FRAGMENT_BYTES(A) \
+ ((A) * sizeof(struct squashfs_fragment_entry))
+
+#define SQUASHFS_FRAGMENT_INDEX(A) (SQUASHFS_FRAGMENT_BYTES(A) / \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEX_OFFSET(A) (SQUASHFS_FRAGMENT_BYTES(A) % \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEXES(A) ((SQUASHFS_FRAGMENT_BYTES(A) + \
+ SQUASHFS_METADATA_SIZE - 1) / \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_FRAGMENT_INDEX_BYTES(A) (SQUASHFS_FRAGMENT_INDEXES(A) *\
+ sizeof(u64))
+
+/* inode lookup table defines */
+#define SQUASHFS_LOOKUP_BYTES(A) ((A) * sizeof(u64))
+
+#define SQUASHFS_LOOKUP_BLOCK(A) (SQUASHFS_LOOKUP_BYTES(A) / \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_LOOKUP_BLOCK_OFFSET(A) (SQUASHFS_LOOKUP_BYTES(A) % \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_LOOKUP_BLOCKS(A) ((SQUASHFS_LOOKUP_BYTES(A) + \
+ SQUASHFS_METADATA_SIZE - 1) / \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_LOOKUP_BLOCK_BYTES(A) (SQUASHFS_LOOKUP_BLOCKS(A) *\
+ sizeof(u64))
+
+/* uid/gid lookup table defines */
+#define SQUASHFS_ID_BYTES(A) ((A) * sizeof(unsigned int))
+
+#define SQUASHFS_ID_BLOCK(A) (SQUASHFS_ID_BYTES(A) / \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_ID_BLOCK_OFFSET(A) (SQUASHFS_ID_BYTES(A) % \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_ID_BLOCKS(A) ((SQUASHFS_ID_BYTES(A) + \
+ SQUASHFS_METADATA_SIZE - 1) / \
+ SQUASHFS_METADATA_SIZE)
+
+#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
+ sizeof(u64))
+
+/* cached data constants for filesystem */
+#define SQUASHFS_CACHED_BLKS 8
+
+#define SQUASHFS_MAX_FILE_SIZE_LOG 64
+
+#define SQUASHFS_MAX_FILE_SIZE (1LL << \
+ (SQUASHFS_MAX_FILE_SIZE_LOG - 2))
+
+#define SQUASHFS_MARKER_BYTE 0xff
+
+/* meta index cache */
+#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
+#define SQUASHFS_META_ENTRIES 127
+#define SQUASHFS_META_SLOTS 8
+
+struct meta_entry {
+ u64 data_block;
+ unsigned int index_block;
+ unsigned short offset;
+ unsigned short pad;
+};
+
+struct meta_index {
+ unsigned int inode_number;
+ unsigned int offset;
+ unsigned short entries;
+ unsigned short skip;
+ unsigned short locked;
+ unsigned short pad;
+ struct meta_entry meta_entry[SQUASHFS_META_ENTRIES];
+};
+
+
+/*
+ * definitions for structures on disk
+ */
+#define ZLIB_COMPRESSION 1
+
+struct squashfs_super_block {
+ __le32 s_magic;
+ __le32 inodes;
+ __le32 mkfs_time;
+ __le32 block_size;
+ __le32 fragments;
+ __le16 compression;
+ __le16 block_log;
+ __le16 flags;
+ __le16 no_ids;
+ __le16 s_major;
+ __le16 s_minor;
+ __le64 root_inode;
+ __le64 bytes_used;
+ __le64 id_table_start;
+ __le64 xattr_table_start;
+ __le64 inode_table_start;
+ __le64 directory_table_start;
+ __le64 fragment_table_start;
+ __le64 lookup_table_start;
+};
+
+struct squashfs_dir_index {
+ __le32 index;
+ __le32 start_block;
+ __le32 size;
+ unsigned char name[0];
+};
+
+struct squashfs_base_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+};
+
+struct squashfs_ipc_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+ __le32 nlink;
+};
+
+struct squashfs_dev_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+ __le32 nlink;
+ __le32 rdev;
+};
+
+struct squashfs_symlink_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+ __le32 nlink;
+ __le32 symlink_size;
+ char symlink[0];
+};
+
+struct squashfs_reg_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+ __le32 start_block;
+ __le32 fragment;
+ __le32 offset;
+ __le32 file_size;
+ __le16 block_list[0];
+};
+
+struct squashfs_lreg_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+ __le64 start_block;
+ __le64 file_size;
+ __le64 sparse;
+ __le32 nlink;
+ __le32 fragment;
+ __le32 offset;
+ __le32 xattr;
+ __le16 block_list[0];
+};
+
+struct squashfs_dir_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+ __le32 start_block;
+ __le32 nlink;
+ __le16 file_size;
+ __le16 offset;
+ __le32 parent_inode;
+};
+
+struct squashfs_ldir_inode {
+ __le16 inode_type;
+ __le16 mode;
+ __le16 uid;
+ __le16 guid;
+ __le32 mtime;
+ __le32 inode_number;
+ __le32 nlink;
+ __le32 file_size;
+ __le32 start_block;
+ __le32 parent_inode;
+ __le16 i_count;
+ __le16 offset;
+ __le32 xattr;
+ struct squashfs_dir_index index[0];
+};
+
+union squashfs_inode {
+ struct squashfs_base_inode base;
+ struct squashfs_dev_inode dev;
+ struct squashfs_symlink_inode symlink;
+ struct squashfs_reg_inode reg;
+ struct squashfs_lreg_inode lreg;
+ struct squashfs_dir_inode dir;
+ struct squashfs_ldir_inode ldir;
+ struct squashfs_ipc_inode ipc;
+};
+
+struct squashfs_dir_entry {
+ __le16 offset;
+ __le16 inode_number;
+ __le16 type;
+ __le16 size;
+ char name[0];
+};
+
+struct squashfs_dir_header {
+ __le32 count;
+ __le32 start_block;
+ __le32 inode_number;
+};
+
+struct squashfs_fragment_entry {
+ __le64 start_block;
+ __le32 size;
+ unsigned int unused;
+};
+
+#endif
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
new file mode 100644
index 000000000000..fbfca30c0c68
--- /dev/null
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -0,0 +1,45 @@
+#ifndef SQUASHFS_FS_I
+#define SQUASHFS_FS_I
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * squashfs_fs_i.h
+ */
+
+struct squashfs_inode_info {
+ u64 start;
+ int offset;
+ union {
+ struct {
+ u64 fragment_block;
+ int fragment_size;
+ int fragment_offset;
+ u64 block_list_start;
+ };
+ struct {
+ u64 dir_idx_start;
+ int dir_idx_offset;
+ int dir_idx_cnt;
+ int parent;
+ };
+ };
+ struct inode vfs_inode;
+};
+#endif
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
new file mode 100644
index 000000000000..c8c65614dd1c
--- /dev/null
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -0,0 +1,76 @@
+#ifndef SQUASHFS_FS_SB
+#define SQUASHFS_FS_SB
+/*
+ * Squashfs
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * squashfs_fs_sb.h
+ */
+
+#include "squashfs_fs.h"
+
+struct squashfs_cache {
+ char *name;
+ int entries;
+ int next_blk;
+ int num_waiters;
+ int unused;
+ int block_size;
+ int pages;
+ spinlock_t lock;
+ wait_queue_head_t wait_queue;
+ struct squashfs_cache_entry *entry;
+};
+
+struct squashfs_cache_entry {
+ u64 block;
+ int length;
+ int refcount;
+ u64 next_index;
+ int pending;
+ int error;
+ int num_waiters;
+ wait_queue_head_t wait_queue;
+ struct squashfs_cache *cache;
+ void **data;
+};
+
+struct squashfs_sb_info {
+ int devblksize;
+ int devblksize_log2;
+ struct squashfs_cache *block_cache;
+ struct squashfs_cache *fragment_cache;
+ struct squashfs_cache *read_page;
+ int next_meta_index;
+ __le64 *id_table;
+ __le64 *fragment_index;
+ unsigned int *fragment_index_2;
+ struct mutex read_data_mutex;
+ struct mutex meta_index_mutex;
+ struct meta_index *meta_index;
+ z_stream stream;
+ __le64 *inode_lookup_table;
+ u64 inode_table;
+ u64 directory_table;
+ unsigned int block_size;
+ unsigned short block_log;
+ long long bytes_used;
+ unsigned int inodes;
+};
+#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
new file mode 100644
index 000000000000..a0466d7467b2
--- /dev/null
+++ b/fs/squashfs/super.c
@@ -0,0 +1,440 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * super.c
+ */
+
+/*
+ * This file implements code to read the superblock, read and initialise
+ * in-memory structures at mount time, and all the VFS glue code to register
+ * the filesystem.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+static struct file_system_type squashfs_fs_type;
+static struct super_operations squashfs_super_ops;
+
+static int supported_squashfs_filesystem(short major, short minor, short comp)
+{
+ if (major < SQUASHFS_MAJOR) {
+ ERROR("Major/Minor mismatch, older Squashfs %d.%d "
+ "filesystems are unsupported\n", major, minor);
+ return -EINVAL;
+ } else if (major > SQUASHFS_MAJOR || minor > SQUASHFS_MINOR) {
+ ERROR("Major/Minor mismatch, trying to mount newer "
+ "%d.%d filesystem\n", major, minor);
+ ERROR("Please update your kernel\n");
+ return -EINVAL;
+ }
+
+ if (comp != ZLIB_COMPRESSION)
+ return -EINVAL;
+
+ return 0;
+}
+
+
+static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+ struct squashfs_sb_info *msblk;
+ struct squashfs_super_block *sblk = NULL;
+ char b[BDEVNAME_SIZE];
+ struct inode *root;
+ long long root_inode;
+ unsigned short flags;
+ unsigned int fragments;
+ u64 lookup_table_start;
+ int err;
+
+ TRACE("Entered squashfs_fill_superblock\n");
+
+ sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL);
+ if (sb->s_fs_info == NULL) {
+ ERROR("Failed to allocate squashfs_sb_info\n");
+ return -ENOMEM;
+ }
+ msblk = sb->s_fs_info;
+
+ msblk->stream.workspace = kmalloc(zlib_inflate_workspacesize(),
+ GFP_KERNEL);
+ if (msblk->stream.workspace == NULL) {
+ ERROR("Failed to allocate zlib workspace\n");
+ goto failure;
+ }
+
+ sblk = kzalloc(sizeof(*sblk), GFP_KERNEL);
+ if (sblk == NULL) {
+ ERROR("Failed to allocate squashfs_super_block\n");
+ goto failure;
+ }
+
+ msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE);
+ msblk->devblksize_log2 = ffz(~msblk->devblksize);
+
+ mutex_init(&msblk->read_data_mutex);
+ mutex_init(&msblk->meta_index_mutex);
+
+ /*
+ * msblk->bytes_used is checked in squashfs_read_table to ensure reads
+ * are not beyond filesystem end. But as we're using
+ * squashfs_read_table here to read the superblock (including the value
+ * of bytes_used) we need to set it to an initial sensible dummy value
+ */
+ msblk->bytes_used = sizeof(*sblk);
+ err = squashfs_read_table(sb, sblk, SQUASHFS_START, sizeof(*sblk));
+
+ if (err < 0) {
+ ERROR("unable to read squashfs_super_block\n");
+ goto failed_mount;
+ }
+
+ /* Check it is a SQUASHFS superblock */
+ sb->s_magic = le32_to_cpu(sblk->s_magic);
+ if (sb->s_magic != SQUASHFS_MAGIC) {
+ if (!silent)
+ ERROR("Can't find a SQUASHFS superblock on %s\n",
+ bdevname(sb->s_bdev, b));
+ err = -EINVAL;
+ goto failed_mount;
+ }
+
+ /* Check the MAJOR & MINOR versions and compression type */
+ err = supported_squashfs_filesystem(le16_to_cpu(sblk->s_major),
+ le16_to_cpu(sblk->s_minor),
+ le16_to_cpu(sblk->compression));
+ if (err < 0)
+ goto failed_mount;
+
+ err = -EINVAL;
+
+ /*
+ * Check if there's xattrs in the filesystem. These are not
+ * supported in this version, so warn that they will be ignored.
+ */
+ if (le64_to_cpu(sblk->xattr_table_start) != SQUASHFS_INVALID_BLK)
+ ERROR("Xattrs in filesystem, these will be ignored\n");
+
+ /* Check the filesystem does not extend beyond the end of the
+ block device */
+ msblk->bytes_used = le64_to_cpu(sblk->bytes_used);
+ if (msblk->bytes_used < 0 || msblk->bytes_used >
+ i_size_read(sb->s_bdev->bd_inode))
+ goto failed_mount;
+
+ /* Check block size for sanity */
+ msblk->block_size = le32_to_cpu(sblk->block_size);
+ if (msblk->block_size > SQUASHFS_FILE_MAX_SIZE)
+ goto failed_mount;
+
+ msblk->block_log = le16_to_cpu(sblk->block_log);
+ if (msblk->block_log > SQUASHFS_FILE_MAX_LOG)
+ goto failed_mount;
+
+ /* Check the root inode for sanity */
+ root_inode = le64_to_cpu(sblk->root_inode);
+ if (SQUASHFS_INODE_OFFSET(root_inode) > SQUASHFS_METADATA_SIZE)
+ goto failed_mount;
+
+ msblk->inode_table = le64_to_cpu(sblk->inode_table_start);
+ msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
+ msblk->inodes = le32_to_cpu(sblk->inodes);
+ flags = le16_to_cpu(sblk->flags);
+
+ TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b));
+ TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(flags)
+ ? "un" : "");
+ TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(flags)
+ ? "un" : "");
+ TRACE("Filesystem size %lld bytes\n", msblk->bytes_used);
+ TRACE("Block size %d\n", msblk->block_size);
+ TRACE("Number of inodes %d\n", msblk->inodes);
+ TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments));
+ TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
+ TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
+ TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
+ TRACE("sblk->fragment_table_start %llx\n",
+ (u64) le64_to_cpu(sblk->fragment_table_start));
+ TRACE("sblk->id_table_start %llx\n",
+ (u64) le64_to_cpu(sblk->id_table_start));
+
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_flags |= MS_RDONLY;
+ sb->s_op = &squashfs_super_ops;
+
+ err = -ENOMEM;
+
+ msblk->block_cache = squashfs_cache_init("metadata",
+ SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE);
+ if (msblk->block_cache == NULL)
+ goto failed_mount;
+
+ /* Allocate read_page block */
+ msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
+ if (msblk->read_page == NULL) {
+ ERROR("Failed to allocate read_page block\n");
+ goto failed_mount;
+ }
+
+ /* Allocate and read id index table */
+ msblk->id_table = squashfs_read_id_index_table(sb,
+ le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids));
+ if (IS_ERR(msblk->id_table)) {
+ err = PTR_ERR(msblk->id_table);
+ msblk->id_table = NULL;
+ goto failed_mount;
+ }
+
+ fragments = le32_to_cpu(sblk->fragments);
+ if (fragments == 0)
+ goto allocate_lookup_table;
+
+ msblk->fragment_cache = squashfs_cache_init("fragment",
+ SQUASHFS_CACHED_FRAGMENTS, msblk->block_size);
+ if (msblk->fragment_cache == NULL) {
+ err = -ENOMEM;
+ goto failed_mount;
+ }
+
+ /* Allocate and read fragment index table */
+ msblk->fragment_index = squashfs_read_fragment_index_table(sb,
+ le64_to_cpu(sblk->fragment_table_start), fragments);
+ if (IS_ERR(msblk->fragment_index)) {
+ err = PTR_ERR(msblk->fragment_index);
+ msblk->fragment_index = NULL;
+ goto failed_mount;
+ }
+
+allocate_lookup_table:
+ lookup_table_start = le64_to_cpu(sblk->lookup_table_start);
+ if (lookup_table_start == SQUASHFS_INVALID_BLK)
+ goto allocate_root;
+
+ /* Allocate and read inode lookup table */
+ msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb,
+ lookup_table_start, msblk->inodes);
+ if (IS_ERR(msblk->inode_lookup_table)) {
+ err = PTR_ERR(msblk->inode_lookup_table);
+ msblk->inode_lookup_table = NULL;
+ goto failed_mount;
+ }
+
+ sb->s_export_op = &squashfs_export_ops;
+
+allocate_root:
+ root = new_inode(sb);
+ if (!root) {
+ err = -ENOMEM;
+ goto failed_mount;
+ }
+
+ err = squashfs_read_inode(root, root_inode);
+ if (err) {
+ iget_failed(root);
+ goto failed_mount;
+ }
+ insert_inode_hash(root);
+
+ sb->s_root = d_alloc_root(root);
+ if (sb->s_root == NULL) {
+ ERROR("Root inode create failed\n");
+ err = -ENOMEM;
+ iput(root);
+ goto failed_mount;
+ }
+
+ TRACE("Leaving squashfs_fill_super\n");
+ kfree(sblk);
+ return 0;
+
+failed_mount:
+ squashfs_cache_delete(msblk->block_cache);
+ squashfs_cache_delete(msblk->fragment_cache);
+ squashfs_cache_delete(msblk->read_page);
+ kfree(msblk->inode_lookup_table);
+ kfree(msblk->fragment_index);
+ kfree(msblk->id_table);
+ kfree(msblk->stream.workspace);
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+ kfree(sblk);
+ return err;
+
+failure:
+ kfree(msblk->stream.workspace);
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+ return -ENOMEM;
+}
+
+
+static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info;
+
+ TRACE("Entered squashfs_statfs\n");
+
+ buf->f_type = SQUASHFS_MAGIC;
+ buf->f_bsize = msblk->block_size;
+ buf->f_blocks = ((msblk->bytes_used - 1) >> msblk->block_log) + 1;
+ buf->f_bfree = buf->f_bavail = 0;
+ buf->f_files = msblk->inodes;
+ buf->f_ffree = 0;
+ buf->f_namelen = SQUASHFS_NAME_LEN;
+
+ return 0;
+}
+
+
+static int squashfs_remount(struct super_block *sb, int *flags, char *data)
+{
+ *flags |= MS_RDONLY;
+ return 0;
+}
+
+
+static void squashfs_put_super(struct super_block *sb)
+{
+ if (sb->s_fs_info) {
+ struct squashfs_sb_info *sbi = sb->s_fs_info;
+ squashfs_cache_delete(sbi->block_cache);
+ squashfs_cache_delete(sbi->fragment_cache);
+ squashfs_cache_delete(sbi->read_page);
+ kfree(sbi->id_table);
+ kfree(sbi->fragment_index);
+ kfree(sbi->meta_index);
+ kfree(sbi->stream.workspace);
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+ }
+}
+
+
+static int squashfs_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data,
+ struct vfsmount *mnt)
+{
+ return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super,
+ mnt);
+}
+
+
+static struct kmem_cache *squashfs_inode_cachep;
+
+
+static void init_once(void *foo)
+{
+ struct squashfs_inode_info *ei = foo;
+
+ inode_init_once(&ei->vfs_inode);
+}
+
+
+static int __init init_inodecache(void)
+{
+ squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache",
+ sizeof(struct squashfs_inode_info), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, init_once);
+
+ return squashfs_inode_cachep ? 0 : -ENOMEM;
+}
+
+
+static void destroy_inodecache(void)
+{
+ kmem_cache_destroy(squashfs_inode_cachep);
+}
+
+
+static int __init init_squashfs_fs(void)
+{
+ int err = init_inodecache();
+
+ if (err)
+ return err;
+
+ err = register_filesystem(&squashfs_fs_type);
+ if (err) {
+ destroy_inodecache();
+ return err;
+ }
+
+ printk(KERN_INFO "squashfs: version 4.0 (2009/01/03) "
+ "Phillip Lougher\n");
+
+ return 0;
+}
+
+
+static void __exit exit_squashfs_fs(void)
+{
+ unregister_filesystem(&squashfs_fs_type);
+ destroy_inodecache();
+}
+
+
+static struct inode *squashfs_alloc_inode(struct super_block *sb)
+{
+ struct squashfs_inode_info *ei =
+ kmem_cache_alloc(squashfs_inode_cachep, GFP_KERNEL);
+
+ return ei ? &ei->vfs_inode : NULL;
+}
+
+
+static void squashfs_destroy_inode(struct inode *inode)
+{
+ kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
+}
+
+
+static struct file_system_type squashfs_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "squashfs",
+ .get_sb = squashfs_get_sb,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV
+};
+
+static struct super_operations squashfs_super_ops = {
+ .alloc_inode = squashfs_alloc_inode,
+ .destroy_inode = squashfs_destroy_inode,
+ .statfs = squashfs_statfs,
+ .put_super = squashfs_put_super,
+ .remount_fs = squashfs_remount
+};
+
+module_init(init_squashfs_fs);
+module_exit(exit_squashfs_fs);
+MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem");
+MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>");
+MODULE_LICENSE("GPL");
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
new file mode 100644
index 000000000000..83d87880aac8
--- /dev/null
+++ b/fs/squashfs/symlink.c
@@ -0,0 +1,118 @@
+/*
+ * Squashfs - a compressed read only filesystem for Linux
+ *
+ * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ * Phillip Lougher <phillip@lougher.demon.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * symlink.c
+ */
+
+/*
+ * This file implements code to handle symbolic links.
+ *
+ * The data contents of symbolic links are stored inside the symbolic
+ * link inode within the inode table. This allows the normally small symbolic
+ * link to be compressed as part of the inode table, achieving much greater
+ * compression than if the symbolic link was compressed individually.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/zlib.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+static int squashfs_symlink_readpage(struct file *file, struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ int index = page->index << PAGE_CACHE_SHIFT;
+ u64 block = squashfs_i(inode)->start;
+ int offset = squashfs_i(inode)->offset;
+ int length = min_t(int, i_size_read(inode) - index, PAGE_CACHE_SIZE);
+ int bytes, copied;
+ void *pageaddr;
+ struct squashfs_cache_entry *entry;
+
+ TRACE("Entered squashfs_symlink_readpage, page index %ld, start block "
+ "%llx, offset %x\n", page->index, block, offset);
+
+ /*
+ * Skip index bytes into symlink metadata.
+ */
+ if (index) {
+ bytes = squashfs_read_metadata(sb, NULL, &block, &offset,
+ index);
+ if (bytes < 0) {
+ ERROR("Unable to read symlink [%llx:%x]\n",
+ squashfs_i(inode)->start,
+ squashfs_i(inode)->offset);
+ goto error_out;
+ }
+ }
+
+ /*
+ * Read length bytes from symlink metadata. Squashfs_read_metadata
+ * is not used here because it can sleep and we want to use
+ * kmap_atomic to map the page. Instead call the underlying
+ * squashfs_cache_get routine. As length bytes may overlap metadata
+ * blocks, we may need to call squashfs_cache_get multiple times.
+ */
+ for (bytes = 0; bytes < length; offset = 0, bytes += copied) {
+ entry = squashfs_cache_get(sb, msblk->block_cache, block, 0);
+ if (entry->error) {
+ ERROR("Unable to read symlink [%llx:%x]\n",
+ squashfs_i(inode)->start,
+ squashfs_i(inode)->offset);
+ squashfs_cache_put(entry);
+ goto error_out;
+ }
+
+ pageaddr = kmap_atomic(page, KM_USER0);
+ copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
+ length - bytes);
+ if (copied == length - bytes)
+ memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
+ else
+ block = entry->next_index;
+ kunmap_atomic(pageaddr, KM_USER0);
+ squashfs_cache_put(entry);
+ }
+
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+
+error_out:
+ SetPageError(page);
+ unlock_page(page);
+ return 0;
+}
+
+
+const struct address_space_operations squashfs_symlink_aops = {
+ .readpage = squashfs_symlink_readpage
+};
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index a7c748fa977a..0f0f0cf3ba9a 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -9,6 +9,7 @@
#include <linux/string.h>
#include "do_mounts.h"
+#include "../fs/squashfs/squashfs_fs.h"
int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
@@ -41,6 +42,7 @@ static int __init crd_load(int in_fd, int out_fd);
* ext2
* romfs
* cramfs
+ * squashfs
* gzip
*/
static int __init
@@ -51,6 +53,7 @@ identify_ramdisk_image(int fd, int start_block)
struct ext2_super_block *ext2sb;
struct romfs_super_block *romfsb;
struct cramfs_super *cramfsb;
+ struct squashfs_super_block *squashfsb;
int nblocks = -1;
unsigned char *buf;
@@ -62,6 +65,7 @@ identify_ramdisk_image(int fd, int start_block)
ext2sb = (struct ext2_super_block *) buf;
romfsb = (struct romfs_super_block *) buf;
cramfsb = (struct cramfs_super *) buf;
+ squashfsb = (struct squashfs_super_block *) buf;
memset(buf, 0xe5, size);
/*
@@ -99,6 +103,16 @@ identify_ramdisk_image(int fd, int start_block)
goto done;
}
+ /* squashfs is at block zero too */
+ if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) {
+ printk(KERN_NOTICE
+ "RAMDISK: squashfs filesystem found at block %d\n",
+ start_block);
+ nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1)
+ >> BLOCK_SIZE_BITS;
+ goto done;
+ }
+
/*
* Read block 1 to test for minix and ext2 superblock
*/