Skip to content

Commit

Permalink
Merge pull request #159 from tuttiq/master
Browse files Browse the repository at this point in the history
Fix deprecated method File.exists? to File.exist?
  • Loading branch information
knowtheory authored Aug 31, 2024
2 parents ff3b443 + 6127e39 commit 67118b8
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 23 deletions.
4 changes: 2 additions & 2 deletions lib/docsplit/image_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def convert(pdf, size, format, previous=nil)
directory = directory_for(size)
pages = @pages || '1-' + Docsplit.extract_length(pdf).to_s
escaped_pdf = ESCAPE[pdf]
FileUtils.mkdir_p(directory) unless File.exists?(directory)
FileUtils.mkdir_p(directory) unless File.exist?(directory)
common = "#{MEMORY_ARGS} -density #{@density} #{resize_arg(size)} #{quality_arg(format)}"
if previous
FileUtils.cp(Dir[directory_for(previous) + '/*'], directory)
Expand All @@ -48,7 +48,7 @@ def convert(pdf, size, format, previous=nil)
end
end
ensure
FileUtils.remove_entry_secure tempdir if File.exists?(tempdir)
FileUtils.remove_entry_secure tempdir if File.exist?(tempdir)
end


Expand Down
6 changes: 3 additions & 3 deletions lib/docsplit/page_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@ def extract(pdfs, opts)
[pdfs].flatten.each do |pdf|
pdf_name = File.basename(pdf, File.extname(pdf))
page_path = ESCAPE[File.join(@output, "#{pdf_name}")] + "_%d.pdf"
FileUtils.mkdir_p @output unless File.exists?(@output)
FileUtils.mkdir_p @output unless File.exist?(@output)

cmd = if DEPENDENCIES[:pdftailor] # prefer pdftailor, but keep pdftk for backwards compatability
"pdftailor unstitch --output #{page_path} #{ESCAPE[pdf]} 2>&1"
else
"pdftk #{ESCAPE[pdf]} burst output #{page_path} 2>&1"
end
result = `#{cmd}`.chomp
FileUtils.rm('doc_data.txt') if File.exists?('doc_data.txt')
FileUtils.rm('doc_data.txt') if File.exist?('doc_data.txt')
raise ExtractionFailed, result if $? != 0
result
end
Expand Down
30 changes: 15 additions & 15 deletions lib/docsplit/pdf_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def osx?
def linux?
!!HOST_OS.match(/linux/i)
end

# The first line of the help output holds the name and version number
# of the office software to be used for extraction.
def version_string
Expand All @@ -35,10 +35,10 @@ def libre_office?
def open_office?
!!version_string.match(/^OpenOffice.org/)
end

# A set of default locations to search for office software
# These have been extracted from JODConverter. Each listed
# path should contain a directory "program" which in turn
# path should contain a directory "program" which in turn
# contains the "soffice" executable.
# see: https://github.com/mirkonasato/jodconverter/blob/master/jodconverter-core/src/main/java/org/artofsolving/jodconverter/office/OfficeUtils.java#L63-L91
def office_search_paths
Expand Down Expand Up @@ -69,7 +69,7 @@ def office_search_paths
end
search_paths
end

# Identify the path to a working office executable.
def office_executable
paths = office_search_paths
Expand All @@ -78,10 +78,10 @@ def office_executable
# raise an error if that path isn't valid, otherwise, add
# it to the front of our search paths.
if ENV['OFFICE_PATH']
raise ArgumentError, "No such file or directory #{ENV['OFFICE_PATH']}" unless File.exists? ENV['OFFICE_PATH']
raise ArgumentError, "No such file or directory #{ENV['OFFICE_PATH']}" unless File.exist? ENV['OFFICE_PATH']
paths.unshift(ENV['OFFICE_PATH'])
end

# The location of the office executable is OS dependent
path_pieces = ["soffice"]
if windows?
Expand All @@ -91,32 +91,32 @@ def office_executable
else
path_pieces += [["program", "soffice"]]
end

# Search for the first suitable office executable
# and short circuit an executable is found.
paths.each do |path|
if File.exists? path
if File.exist? path
@@executable ||= path unless File.directory? path
path_pieces.each do |pieces|
check_path = File.join(path, pieces)
@@executable ||= check_path if File.exists? check_path
@@executable ||= check_path if File.exist? check_path
end
end
break if @@executable
end
raise OfficeNotFound, "No office software found" unless @@executable
@@executable
end

# Used to specify the office location for JODConverter
def office_path
File.dirname(File.dirname(office_executable))
end

# Convert documents to PDF.
def extract(docs, opts)
out = opts[:output] || '.'
FileUtils.mkdir_p out unless File.exists?(out)
FileUtils.mkdir_p out unless File.exist?(out)
[docs].flatten.each do |doc|
ext = File.extname(doc)
basename = File.basename(doc, ext)
Expand All @@ -128,7 +128,7 @@ def extract(docs, opts)
if libre_office?
# Set the LibreOffice user profile, so that parallel uses of cloudcrowd don't trip over each other.
ENV['SYSUSERCONFIG']="file://#{File.expand_path(escaped_out)}"

options = "--headless --invisible --norestore --nolockcheck --convert-to pdf --outdir #{escaped_out} #{escaped_doc}"
cmd = "#{office_executable} #{options} 2>&1"
result = `#{cmd}`.chomp
Expand All @@ -147,9 +147,9 @@ def extract(docs, opts)
LOGGING = "-Djava.util.logging.config.file=#{ESCAPED_ROOT}/vendor/logging.properties"

HEADLESS = "-Djava.awt.headless=true"

private

# Runs a Java command, with quieted logging, and the classpath set properly.
def run_jod(command, pdfs, opts, return_output=false)

Expand Down
4 changes: 2 additions & 2 deletions lib/docsplit/text_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def initialize
# Extract text from a list of PDFs.
def extract(pdfs, opts)
extract_options opts
FileUtils.mkdir_p @output unless File.exists?(@output)
FileUtils.mkdir_p @output unless File.exist?(@output)
[pdfs].flatten.each do |pdf|
@pdf_name = File.basename(pdf, File.extname(pdf))
pages = (@pages == 'all') ? 1..Docsplit.extract_length(pdf) : @pages
Expand Down Expand Up @@ -80,7 +80,7 @@ def extract_from_ocr(pdf, pages)
clean_text(base_path + '.txt') if @clean_ocr
end
ensure
FileUtils.remove_entry_secure tempdir if File.exists?(tempdir)
FileUtils.remove_entry_secure tempdir if File.exist?(tempdir)
end


Expand Down
2 changes: 1 addition & 1 deletion test/test_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Minitest::Test
OUTPUT = 'test/output'

def clear_output
FileUtils.rm_r(OUTPUT) if File.exists?(OUTPUT)
FileUtils.rm_r(OUTPUT) if File.exist?(OUTPUT)
end

def teardown
Expand Down

0 comments on commit 67118b8

Please sign in to comment.