I am working on making some modifications to an epub and then rezipping it. I am currently doing this in Golang. The crux of the logic is housed in this repo here. However most of the issue resides in the following logic not setting/copying the mimetype properly:
package filehandler
import (
"archive/zip"
"context"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
"github.com/pjkaufman/go-go-gadgets/pkg/logger"
"golang.org/x/sync/errgroup"
)
const (
tempZip = "compress.zip"
// have to use these or similar permissions to avoid permission denied errors in some cases
folderPerms fs.FileMode = 0755
numWorkers int = 5
)
// UnzipRunOperationAndRezip starts by deleting the destination directory if it exists,
// then it goes ahead an unzips the contents into the destination directory
// once that is done it runs the operation func on the destination folder
// lastly it rezips the folder back to compress.zip
func UnzipRunOperationAndRezip(src, dest string, operation func()) {
var err error
if FolderExists(dest) {
err = os.RemoveAll(dest)
if err != nil {
logger.WriteError(fmt.Sprintf("failed to delete the destination directory %q: %s", dest, err))
}
}
err = Unzip(src, dest)
if err != nil {
logger.WriteError(fmt.Sprintf("failed to unzip %q: %s", src, err))
}
operation()
err = Rezip(dest, tempZip)
if err != nil {
logger.WriteError(fmt.Sprintf("failed to rezip content for source %q: %s", src, err))
}
err = os.RemoveAll(dest)
if err != nil {
logger.WriteError(fmt.Sprintf("failed to cleanup the destination directory %q: %s", dest, err))
}
MustRename(src, src+".original")
MustRename(tempZip, src)
}
// Unzip is based on /a/24792688
func Unzip(src, dest string) error {
r, err := zip.OpenReader(src)
if err != nil {
return err
}
defer func() {
if err := r.Close(); err != nil {
panic(err)
}
}()
err = os.MkdirAll(dest, folderPerms)
if err != nil {
return err
}
var files = make(chan *zip.File, len(r.File))
g, ctx := errgroup.WithContext(context.Background())
for i := 0; i < numWorkers; i++ {
g.Go(func() error {
for {
select {
case file, ok := <-files:
if ok {
wErr := extractAndWriteFile(dest, file)
if wErr != nil {
return wErr
}
} else {
return nil
}
case <-ctx.Done():
return ctx.Err()
}
}
})
}
for _, f := range r.File {
files <- f
}
close(files)
return g.Wait()
}
func extractAndWriteFile(dest string, f *zip.File) error {
rc, err := f.Open()
if err != nil {
return err
}
defer func() {
if err := rc.Close(); err != nil {
panic(err)
}
}()
path := filepath.Join(dest, f.Name)
// Check for ZipSlip (Directory traversal)
if !strings.HasPrefix(path, filepath.Clean(dest)+string(os.PathSeparator)) {
return fmt.Errorf("illegal file path: %s", path)
}
if f.FileInfo().IsDir() {
err = os.MkdirAll(path, folderPerms)
if err != nil {
return err
}
} else {
err = os.MkdirAll(filepath.Dir(path), folderPerms)
if err != nil {
return err
}
f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if err != nil {
return err
}
defer func() {
if err := f.Close(); err != nil {
panic(err)
}
}()
_, err = io.Copy(f, rc)
if err != nil {
return err
}
}
return nil
}
// Rezip is based on /a/63233911
func Rezip(src, dest string) error {
file, err := os.Create(dest)
if err != nil {
return err
}
defer file.Close()
w := zip.NewWriter(file)
defer w.Close()
walker := func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// skip empty directories
if info.IsDir() {
return nil
}
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()
// need a zip relative path to avoid creating extra directories inside of the zip
var zipRelativePath = strings.Replace(path, src+string(os.PathSeparator), "", 1)
f, err := w.Create(zipRelativePath)
if err != nil {
return err
}
_, err = io.Copy(f, file)
if err != nil {
return err
}
return nil
}
err = filepath.Walk(src, walker)
if err != nil {
return err
}
return nil
}
The current problem that I am facing is that when I do this and then run the epubchecker validator on the file I am getting the following message:
ERROR(PKG-006): ./filename.epub(-1,-1): Mimetype file entry is missing or is not the first file in the archive.
Validating using EPUB version 2.0.1 rules.
Check finished with errors
Messages: 0 fatals / 1 error / 0 warnings / 0 infos
EPUBCheck completed
I am not really sure what I need to do to expressly set the mimetype when I create the zip. I did at one time try adding the following logic to the rezip logic just to test if it would properly set the mimetype, but I got an error saying the zip file header was corrupted:
// Rezip is based on /a/63233911
func Rezip(src, dest, mimetype string) error {
file, err := os.Create(dest)
if err != nil {
return err
}
defer file.Close()
var mtype []byte
mtype = []byte(mimetype)
err = binary.Write(file, binary.LittleEndian, "application/epub+zip")
if err != nil {
return err
}
...
}
When setting the mimetype failed, I tried copying over the metainf data first and skipping that data in the walking logic, but got the same validation error as prior:
w := zip.NewWriter(file)
defer w.Close()
var mimetypePath = src + string(os.PathSeparator) + "META-INF/container.xml"
err = writeToZip(w, src, mimetypePath)
if err != nil {
return err
}
walker := func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// skip empty directories
if info.IsDir() {
return nil
}
if mimetypePath == path {
return nil
}
err = writeToZip(w, src, path)
if err != nil {
return err
}
return nil
}
err = filepath.Walk(src, walker)
if err != nil {
return err
}
return nil
}
func writeToZip(w *zip.Writer, src, path string) error {
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()
// need a zip relative path to avoid creating extra directories inside of the zip
var zipRelativePath = strings.Replace(path, src+string(os.PathSeparator), "", 1)
f, err := w.Create(zipRelativePath)
if err != nil {
return err
}
_, err = io.Copy(f, file)
if err != nil {
return err
}
return nil
}
I did see the following two questions that are similar, but they do not answer my question:
- Haskell zip package: How to add mimetype file as first file of the archive
- Implies the issue may be with capitalization or with the container.xml being compressed
- Make MIMETYPE file the first file in an EPUB ZIP file?
- Shows some commands to run via the CLI, but does not really help understanding how to fix this in pure Golang
Any ideas as to what I am doing wrong when trying to set/copy the mimetype of the zip file when I create it? Thanks for the help!